1#!/usr/bin/python 2''' 3From gdb 7 onwards, gdb's build can be configured --with-python, allowing gdb 4to be extended with Python code e.g. for library-specific data visualizations, 5such as for the C++ STL types. Documentation on this API can be seen at: 6http://sourceware.org/gdb/current/onlinedocs/gdb/Python-API.html 7 8 9This python module deals with the case when the process being debugged (the 10"inferior process" in gdb parlance) is itself python, or more specifically, 11linked against libpython. In this situation, almost every item of data is a 12(PyObject*), and having the debugger merely print their addresses is not very 13enlightening. 14 15This module embeds knowledge about the implementation details of libpython so 16that we can emit useful visualizations e.g. a string, a list, a dict, a frame 17giving file/line information and the state of local variables 18 19In particular, given a gdb.Value corresponding to a PyObject* in the inferior 20process, we can generate a "proxy value" within the gdb process. For example, 21given a PyObject* in the inferior process that is in fact a PyListObject* 22holding three PyObject* that turn out to be PyBytesObject* instances, we can 23generate a proxy value within the gdb process that is a list of bytes 24instances: 25 [b"foo", b"bar", b"baz"] 26 27Doing so can be expensive for complicated graphs of objects, and could take 28some time, so we also have a "write_repr" method that writes a representation 29of the data to a file-like object. This allows us to stop the traversal by 30having the file-like object raise an exception if it gets too much data. 31 32With both "proxyval" and "write_repr" we keep track of the set of all addresses 33visited so far in the traversal, to avoid infinite recursion due to cycles in 34the graph of object references. 35 36We try to defer gdb.lookup_type() invocations for python types until as late as 37possible: for a dynamically linked python binary, when the process starts in 38the debugger, the libpython.so hasn't been dynamically loaded yet, so none of 39the type names are known to the debugger 40 41The module also extends gdb with some python-specific commands. 42''' 43 44# NOTE: some gdbs are linked with Python 3, so this file should be dual-syntax 45# compatible (2.6+ and 3.0+). See #19308. 46 47from __future__ import print_function 48import gdb 49import os 50import locale 51import sys 52 53if sys.version_info[0] >= 3: 54 unichr = chr 55 xrange = range 56 long = int 57 58# Look up the gdb.Type for some standard types: 59# Those need to be refreshed as types (pointer sizes) may change when 60# gdb loads different executables 61 62def _type_char_ptr(): 63 return gdb.lookup_type('char').pointer() # char* 64 65 66def _type_unsigned_char_ptr(): 67 return gdb.lookup_type('unsigned char').pointer() # unsigned char* 68 69 70def _type_unsigned_short_ptr(): 71 return gdb.lookup_type('unsigned short').pointer() 72 73 74def _type_unsigned_int_ptr(): 75 return gdb.lookup_type('unsigned int').pointer() 76 77 78def _sizeof_void_p(): 79 return gdb.lookup_type('void').pointer().sizeof 80 81 82# value computed later, see PyUnicodeObjectPtr.proxy() 83_is_pep393 = None 84 85Py_TPFLAGS_HEAPTYPE = (1 << 9) 86Py_TPFLAGS_LONG_SUBCLASS = (1 << 24) 87Py_TPFLAGS_LIST_SUBCLASS = (1 << 25) 88Py_TPFLAGS_TUPLE_SUBCLASS = (1 << 26) 89Py_TPFLAGS_BYTES_SUBCLASS = (1 << 27) 90Py_TPFLAGS_UNICODE_SUBCLASS = (1 << 28) 91Py_TPFLAGS_DICT_SUBCLASS = (1 << 29) 92Py_TPFLAGS_BASE_EXC_SUBCLASS = (1 << 30) 93Py_TPFLAGS_TYPE_SUBCLASS = (1 << 31) 94 95 96MAX_OUTPUT_LEN=1024 97 98hexdigits = "0123456789abcdef" 99 100ENCODING = locale.getpreferredencoding() 101 102FRAME_INFO_OPTIMIZED_OUT = '(frame information optimized out)' 103UNABLE_READ_INFO_PYTHON_FRAME = 'Unable to read information on python frame' 104EVALFRAME = '_PyEval_EvalFrameDefault' 105 106class NullPyObjectPtr(RuntimeError): 107 pass 108 109 110def safety_limit(val): 111 # Given an integer value from the process being debugged, limit it to some 112 # safety threshold so that arbitrary breakage within said process doesn't 113 # break the gdb process too much (e.g. sizes of iterations, sizes of lists) 114 return min(val, 1000) 115 116 117def safe_range(val): 118 # As per range, but don't trust the value too much: cap it to a safety 119 # threshold in case the data was corrupted 120 return xrange(safety_limit(int(val))) 121 122if sys.version_info[0] >= 3: 123 def write_unicode(file, text): 124 file.write(text) 125else: 126 def write_unicode(file, text): 127 # Write a byte or unicode string to file. Unicode strings are encoded to 128 # ENCODING encoding with 'backslashreplace' error handler to avoid 129 # UnicodeEncodeError. 130 if isinstance(text, unicode): 131 text = text.encode(ENCODING, 'backslashreplace') 132 file.write(text) 133 134try: 135 os_fsencode = os.fsencode 136except AttributeError: 137 def os_fsencode(filename): 138 if not isinstance(filename, unicode): 139 return filename 140 encoding = sys.getfilesystemencoding() 141 if encoding == 'mbcs': 142 # mbcs doesn't support surrogateescape 143 return filename.encode(encoding) 144 encoded = [] 145 for char in filename: 146 # surrogateescape error handler 147 if 0xDC80 <= ord(char) <= 0xDCFF: 148 byte = chr(ord(char) - 0xDC00) 149 else: 150 byte = char.encode(encoding) 151 encoded.append(byte) 152 return ''.join(encoded) 153 154class StringTruncated(RuntimeError): 155 pass 156 157class TruncatedStringIO(object): 158 '''Similar to io.StringIO, but can truncate the output by raising a 159 StringTruncated exception''' 160 def __init__(self, maxlen=None): 161 self._val = '' 162 self.maxlen = maxlen 163 164 def write(self, data): 165 if self.maxlen: 166 if len(data) + len(self._val) > self.maxlen: 167 # Truncation: 168 self._val += data[0:self.maxlen - len(self._val)] 169 raise StringTruncated() 170 171 self._val += data 172 173 def getvalue(self): 174 return self._val 175 176class PyObjectPtr(object): 177 """ 178 Class wrapping a gdb.Value that's either a (PyObject*) within the 179 inferior process, or some subclass pointer e.g. (PyBytesObject*) 180 181 There will be a subclass for every refined PyObject type that we care 182 about. 183 184 Note that at every stage the underlying pointer could be NULL, point 185 to corrupt data, etc; this is the debugger, after all. 186 """ 187 _typename = 'PyObject' 188 189 def __init__(self, gdbval, cast_to=None): 190 if cast_to: 191 self._gdbval = gdbval.cast(cast_to) 192 else: 193 self._gdbval = gdbval 194 195 def field(self, name): 196 ''' 197 Get the gdb.Value for the given field within the PyObject, coping with 198 some python 2 versus python 3 differences. 199 200 Various libpython types are defined using the "PyObject_HEAD" and 201 "PyObject_VAR_HEAD" macros. 202 203 In Python 2, this these are defined so that "ob_type" and (for a var 204 object) "ob_size" are fields of the type in question. 205 206 In Python 3, this is defined as an embedded PyVarObject type thus: 207 PyVarObject ob_base; 208 so that the "ob_size" field is located insize the "ob_base" field, and 209 the "ob_type" is most easily accessed by casting back to a (PyObject*). 210 ''' 211 if self.is_null(): 212 raise NullPyObjectPtr(self) 213 214 if name == 'ob_type': 215 pyo_ptr = self._gdbval.cast(PyObjectPtr.get_gdb_type()) 216 return pyo_ptr.dereference()[name] 217 218 if name == 'ob_size': 219 pyo_ptr = self._gdbval.cast(PyVarObjectPtr.get_gdb_type()) 220 return pyo_ptr.dereference()[name] 221 222 # General case: look it up inside the object: 223 return self._gdbval.dereference()[name] 224 225 def pyop_field(self, name): 226 ''' 227 Get a PyObjectPtr for the given PyObject* field within this PyObject, 228 coping with some python 2 versus python 3 differences. 229 ''' 230 return PyObjectPtr.from_pyobject_ptr(self.field(name)) 231 232 def write_field_repr(self, name, out, visited): 233 ''' 234 Extract the PyObject* field named "name", and write its representation 235 to file-like object "out" 236 ''' 237 field_obj = self.pyop_field(name) 238 field_obj.write_repr(out, visited) 239 240 def get_truncated_repr(self, maxlen): 241 ''' 242 Get a repr-like string for the data, but truncate it at "maxlen" bytes 243 (ending the object graph traversal as soon as you do) 244 ''' 245 out = TruncatedStringIO(maxlen) 246 try: 247 self.write_repr(out, set()) 248 except StringTruncated: 249 # Truncation occurred: 250 return out.getvalue() + '...(truncated)' 251 252 # No truncation occurred: 253 return out.getvalue() 254 255 def type(self): 256 return PyTypeObjectPtr(self.field('ob_type')) 257 258 def is_null(self): 259 return 0 == long(self._gdbval) 260 261 def is_optimized_out(self): 262 ''' 263 Is the value of the underlying PyObject* visible to the debugger? 264 265 This can vary with the precise version of the compiler used to build 266 Python, and the precise version of gdb. 267 268 See e.g. https://bugzilla.redhat.com/show_bug.cgi?id=556975 with 269 PyEval_EvalFrameEx's "f" 270 ''' 271 return self._gdbval.is_optimized_out 272 273 def safe_tp_name(self): 274 try: 275 ob_type = self.type() 276 tp_name = ob_type.field('tp_name') 277 return tp_name.string() 278 # NullPyObjectPtr: NULL tp_name? 279 # RuntimeError: Can't even read the object at all? 280 # UnicodeDecodeError: Failed to decode tp_name bytestring 281 except (NullPyObjectPtr, RuntimeError, UnicodeDecodeError): 282 return 'unknown' 283 284 def proxyval(self, visited): 285 ''' 286 Scrape a value from the inferior process, and try to represent it 287 within the gdb process, whilst (hopefully) avoiding crashes when 288 the remote data is corrupt. 289 290 Derived classes will override this. 291 292 For example, a PyIntObject* with ob_ival 42 in the inferior process 293 should result in an int(42) in this process. 294 295 visited: a set of all gdb.Value pyobject pointers already visited 296 whilst generating this value (to guard against infinite recursion when 297 visiting object graphs with loops). Analogous to Py_ReprEnter and 298 Py_ReprLeave 299 ''' 300 301 class FakeRepr(object): 302 """ 303 Class representing a non-descript PyObject* value in the inferior 304 process for when we don't have a custom scraper, intended to have 305 a sane repr(). 306 """ 307 308 def __init__(self, tp_name, address): 309 self.tp_name = tp_name 310 self.address = address 311 312 def __repr__(self): 313 # For the NULL pointer, we have no way of knowing a type, so 314 # special-case it as per 315 # http://bugs.python.org/issue8032#msg100882 316 if self.address == 0: 317 return '0x0' 318 return '<%s at remote 0x%x>' % (self.tp_name, self.address) 319 320 return FakeRepr(self.safe_tp_name(), 321 long(self._gdbval)) 322 323 def write_repr(self, out, visited): 324 ''' 325 Write a string representation of the value scraped from the inferior 326 process to "out", a file-like object. 327 ''' 328 # Default implementation: generate a proxy value and write its repr 329 # However, this could involve a lot of work for complicated objects, 330 # so for derived classes we specialize this 331 return out.write(repr(self.proxyval(visited))) 332 333 @classmethod 334 def subclass_from_type(cls, t): 335 ''' 336 Given a PyTypeObjectPtr instance wrapping a gdb.Value that's a 337 (PyTypeObject*), determine the corresponding subclass of PyObjectPtr 338 to use 339 340 Ideally, we would look up the symbols for the global types, but that 341 isn't working yet: 342 (gdb) python print gdb.lookup_symbol('PyList_Type')[0].value 343 Traceback (most recent call last): 344 File "<string>", line 1, in <module> 345 NotImplementedError: Symbol type not yet supported in Python scripts. 346 Error while executing Python code. 347 348 For now, we use tp_flags, after doing some string comparisons on the 349 tp_name for some special-cases that don't seem to be visible through 350 flags 351 ''' 352 try: 353 tp_name = t.field('tp_name').string() 354 tp_flags = int(t.field('tp_flags')) 355 # RuntimeError: NULL pointers 356 # UnicodeDecodeError: string() fails to decode the bytestring 357 except (RuntimeError, UnicodeDecodeError): 358 # Handle any kind of error e.g. NULL ptrs by simply using the base 359 # class 360 return cls 361 362 #print('tp_flags = 0x%08x' % tp_flags) 363 #print('tp_name = %r' % tp_name) 364 365 name_map = {'bool': PyBoolObjectPtr, 366 'classobj': PyClassObjectPtr, 367 'NoneType': PyNoneStructPtr, 368 'frame': PyFrameObjectPtr, 369 'set' : PySetObjectPtr, 370 'frozenset' : PySetObjectPtr, 371 'builtin_function_or_method' : PyCFunctionObjectPtr, 372 'method-wrapper': wrapperobject, 373 } 374 if tp_name in name_map: 375 return name_map[tp_name] 376 377 if tp_flags & Py_TPFLAGS_HEAPTYPE: 378 return HeapTypeObjectPtr 379 380 if tp_flags & Py_TPFLAGS_LONG_SUBCLASS: 381 return PyLongObjectPtr 382 if tp_flags & Py_TPFLAGS_LIST_SUBCLASS: 383 return PyListObjectPtr 384 if tp_flags & Py_TPFLAGS_TUPLE_SUBCLASS: 385 return PyTupleObjectPtr 386 if tp_flags & Py_TPFLAGS_BYTES_SUBCLASS: 387 return PyBytesObjectPtr 388 if tp_flags & Py_TPFLAGS_UNICODE_SUBCLASS: 389 return PyUnicodeObjectPtr 390 if tp_flags & Py_TPFLAGS_DICT_SUBCLASS: 391 return PyDictObjectPtr 392 if tp_flags & Py_TPFLAGS_BASE_EXC_SUBCLASS: 393 return PyBaseExceptionObjectPtr 394 #if tp_flags & Py_TPFLAGS_TYPE_SUBCLASS: 395 # return PyTypeObjectPtr 396 397 # Use the base class: 398 return cls 399 400 @classmethod 401 def from_pyobject_ptr(cls, gdbval): 402 ''' 403 Try to locate the appropriate derived class dynamically, and cast 404 the pointer accordingly. 405 ''' 406 try: 407 p = PyObjectPtr(gdbval) 408 cls = cls.subclass_from_type(p.type()) 409 return cls(gdbval, cast_to=cls.get_gdb_type()) 410 except RuntimeError: 411 # Handle any kind of error e.g. NULL ptrs by simply using the base 412 # class 413 pass 414 return cls(gdbval) 415 416 @classmethod 417 def get_gdb_type(cls): 418 return gdb.lookup_type(cls._typename).pointer() 419 420 def as_address(self): 421 return long(self._gdbval) 422 423class PyVarObjectPtr(PyObjectPtr): 424 _typename = 'PyVarObject' 425 426class ProxyAlreadyVisited(object): 427 ''' 428 Placeholder proxy to use when protecting against infinite recursion due to 429 loops in the object graph. 430 431 Analogous to the values emitted by the users of Py_ReprEnter and Py_ReprLeave 432 ''' 433 def __init__(self, rep): 434 self._rep = rep 435 436 def __repr__(self): 437 return self._rep 438 439 440def _write_instance_repr(out, visited, name, pyop_attrdict, address): 441 '''Shared code for use by all classes: 442 write a representation to file-like object "out"''' 443 out.write('<') 444 out.write(name) 445 446 # Write dictionary of instance attributes: 447 if isinstance(pyop_attrdict, PyDictObjectPtr): 448 out.write('(') 449 first = True 450 for pyop_arg, pyop_val in pyop_attrdict.iteritems(): 451 if not first: 452 out.write(', ') 453 first = False 454 out.write(pyop_arg.proxyval(visited)) 455 out.write('=') 456 pyop_val.write_repr(out, visited) 457 out.write(')') 458 out.write(' at remote 0x%x>' % address) 459 460 461class InstanceProxy(object): 462 463 def __init__(self, cl_name, attrdict, address): 464 self.cl_name = cl_name 465 self.attrdict = attrdict 466 self.address = address 467 468 def __repr__(self): 469 if isinstance(self.attrdict, dict): 470 kwargs = ', '.join(["%s=%r" % (arg, val) 471 for arg, val in self.attrdict.items()]) 472 return '<%s(%s) at remote 0x%x>' % (self.cl_name, 473 kwargs, self.address) 474 else: 475 return '<%s at remote 0x%x>' % (self.cl_name, 476 self.address) 477 478def _PyObject_VAR_SIZE(typeobj, nitems): 479 if _PyObject_VAR_SIZE._type_size_t is None: 480 _PyObject_VAR_SIZE._type_size_t = gdb.lookup_type('size_t') 481 482 return ( ( typeobj.field('tp_basicsize') + 483 nitems * typeobj.field('tp_itemsize') + 484 (_sizeof_void_p() - 1) 485 ) & ~(_sizeof_void_p() - 1) 486 ).cast(_PyObject_VAR_SIZE._type_size_t) 487_PyObject_VAR_SIZE._type_size_t = None 488 489class HeapTypeObjectPtr(PyObjectPtr): 490 _typename = 'PyObject' 491 492 def get_attr_dict(self): 493 ''' 494 Get the PyDictObject ptr representing the attribute dictionary 495 (or None if there's a problem) 496 ''' 497 try: 498 typeobj = self.type() 499 dictoffset = int_from_int(typeobj.field('tp_dictoffset')) 500 if dictoffset != 0: 501 if dictoffset < 0: 502 type_PyVarObject_ptr = gdb.lookup_type('PyVarObject').pointer() 503 tsize = int_from_int(self._gdbval.cast(type_PyVarObject_ptr)['ob_size']) 504 if tsize < 0: 505 tsize = -tsize 506 size = _PyObject_VAR_SIZE(typeobj, tsize) 507 dictoffset += size 508 assert dictoffset > 0 509 assert dictoffset % _sizeof_void_p() == 0 510 511 dictptr = self._gdbval.cast(_type_char_ptr()) + dictoffset 512 PyObjectPtrPtr = PyObjectPtr.get_gdb_type().pointer() 513 dictptr = dictptr.cast(PyObjectPtrPtr) 514 return PyObjectPtr.from_pyobject_ptr(dictptr.dereference()) 515 except RuntimeError: 516 # Corrupt data somewhere; fail safe 517 pass 518 519 # Not found, or some kind of error: 520 return None 521 522 def proxyval(self, visited): 523 ''' 524 Support for classes. 525 526 Currently we just locate the dictionary using a transliteration to 527 python of _PyObject_GetDictPtr, ignoring descriptors 528 ''' 529 # Guard against infinite loops: 530 if self.as_address() in visited: 531 return ProxyAlreadyVisited('<...>') 532 visited.add(self.as_address()) 533 534 pyop_attr_dict = self.get_attr_dict() 535 if pyop_attr_dict: 536 attr_dict = pyop_attr_dict.proxyval(visited) 537 else: 538 attr_dict = {} 539 tp_name = self.safe_tp_name() 540 541 # Class: 542 return InstanceProxy(tp_name, attr_dict, long(self._gdbval)) 543 544 def write_repr(self, out, visited): 545 # Guard against infinite loops: 546 if self.as_address() in visited: 547 out.write('<...>') 548 return 549 visited.add(self.as_address()) 550 551 pyop_attrdict = self.get_attr_dict() 552 _write_instance_repr(out, visited, 553 self.safe_tp_name(), pyop_attrdict, self.as_address()) 554 555class ProxyException(Exception): 556 def __init__(self, tp_name, args): 557 self.tp_name = tp_name 558 self.args = args 559 560 def __repr__(self): 561 return '%s%r' % (self.tp_name, self.args) 562 563class PyBaseExceptionObjectPtr(PyObjectPtr): 564 """ 565 Class wrapping a gdb.Value that's a PyBaseExceptionObject* i.e. an exception 566 within the process being debugged. 567 """ 568 _typename = 'PyBaseExceptionObject' 569 570 def proxyval(self, visited): 571 # Guard against infinite loops: 572 if self.as_address() in visited: 573 return ProxyAlreadyVisited('(...)') 574 visited.add(self.as_address()) 575 arg_proxy = self.pyop_field('args').proxyval(visited) 576 return ProxyException(self.safe_tp_name(), 577 arg_proxy) 578 579 def write_repr(self, out, visited): 580 # Guard against infinite loops: 581 if self.as_address() in visited: 582 out.write('(...)') 583 return 584 visited.add(self.as_address()) 585 586 out.write(self.safe_tp_name()) 587 self.write_field_repr('args', out, visited) 588 589class PyClassObjectPtr(PyObjectPtr): 590 """ 591 Class wrapping a gdb.Value that's a PyClassObject* i.e. a <classobj> 592 instance within the process being debugged. 593 """ 594 _typename = 'PyClassObject' 595 596 597class BuiltInFunctionProxy(object): 598 def __init__(self, ml_name): 599 self.ml_name = ml_name 600 601 def __repr__(self): 602 return "<built-in function %s>" % self.ml_name 603 604class BuiltInMethodProxy(object): 605 def __init__(self, ml_name, pyop_m_self): 606 self.ml_name = ml_name 607 self.pyop_m_self = pyop_m_self 608 609 def __repr__(self): 610 return ('<built-in method %s of %s object at remote 0x%x>' 611 % (self.ml_name, 612 self.pyop_m_self.safe_tp_name(), 613 self.pyop_m_self.as_address()) 614 ) 615 616class PyCFunctionObjectPtr(PyObjectPtr): 617 """ 618 Class wrapping a gdb.Value that's a PyCFunctionObject* 619 (see Include/methodobject.h and Objects/methodobject.c) 620 """ 621 _typename = 'PyCFunctionObject' 622 623 def proxyval(self, visited): 624 m_ml = self.field('m_ml') # m_ml is a (PyMethodDef*) 625 try: 626 ml_name = m_ml['ml_name'].string() 627 except UnicodeDecodeError: 628 ml_name = '<ml_name:UnicodeDecodeError>' 629 630 pyop_m_self = self.pyop_field('m_self') 631 if pyop_m_self.is_null(): 632 return BuiltInFunctionProxy(ml_name) 633 else: 634 return BuiltInMethodProxy(ml_name, pyop_m_self) 635 636 637class PyCodeObjectPtr(PyObjectPtr): 638 """ 639 Class wrapping a gdb.Value that's a PyCodeObject* i.e. a <code> instance 640 within the process being debugged. 641 """ 642 _typename = 'PyCodeObject' 643 644 def addr2line(self, addrq): 645 ''' 646 Get the line number for a given bytecode offset 647 648 Analogous to PyCode_Addr2Line; translated from pseudocode in 649 Objects/lnotab_notes.txt 650 ''' 651 co_lnotab = self.pyop_field('co_lnotab').proxyval(set()) 652 653 # Initialize lineno to co_firstlineno as per PyCode_Addr2Line 654 # not 0, as lnotab_notes.txt has it: 655 lineno = int_from_int(self.field('co_firstlineno')) 656 657 addr = 0 658 for addr_incr, line_incr in zip(co_lnotab[::2], co_lnotab[1::2]): 659 addr += ord(addr_incr) 660 if addr > addrq: 661 return lineno 662 lineno += ord(line_incr) 663 return lineno 664 665 666class PyDictObjectPtr(PyObjectPtr): 667 """ 668 Class wrapping a gdb.Value that's a PyDictObject* i.e. a dict instance 669 within the process being debugged. 670 """ 671 _typename = 'PyDictObject' 672 673 def iteritems(self): 674 ''' 675 Yields a sequence of (PyObjectPtr key, PyObjectPtr value) pairs, 676 analogous to dict.iteritems() 677 ''' 678 keys = self.field('ma_keys') 679 values = self.field('ma_values') 680 entries, nentries = self._get_entries(keys) 681 for i in safe_range(nentries): 682 ep = entries[i] 683 if long(values): 684 pyop_value = PyObjectPtr.from_pyobject_ptr(values[i]) 685 else: 686 pyop_value = PyObjectPtr.from_pyobject_ptr(ep['me_value']) 687 if not pyop_value.is_null(): 688 pyop_key = PyObjectPtr.from_pyobject_ptr(ep['me_key']) 689 yield (pyop_key, pyop_value) 690 691 def proxyval(self, visited): 692 # Guard against infinite loops: 693 if self.as_address() in visited: 694 return ProxyAlreadyVisited('{...}') 695 visited.add(self.as_address()) 696 697 result = {} 698 for pyop_key, pyop_value in self.iteritems(): 699 proxy_key = pyop_key.proxyval(visited) 700 proxy_value = pyop_value.proxyval(visited) 701 result[proxy_key] = proxy_value 702 return result 703 704 def write_repr(self, out, visited): 705 # Guard against infinite loops: 706 if self.as_address() in visited: 707 out.write('{...}') 708 return 709 visited.add(self.as_address()) 710 711 out.write('{') 712 first = True 713 for pyop_key, pyop_value in self.iteritems(): 714 if not first: 715 out.write(', ') 716 first = False 717 pyop_key.write_repr(out, visited) 718 out.write(': ') 719 pyop_value.write_repr(out, visited) 720 out.write('}') 721 722 def _get_entries(self, keys): 723 dk_nentries = int(keys['dk_nentries']) 724 dk_size = int(keys['dk_size']) 725 try: 726 # <= Python 3.5 727 return keys['dk_entries'], dk_size 728 except RuntimeError: 729 # >= Python 3.6 730 pass 731 732 if dk_size <= 0xFF: 733 offset = dk_size 734 elif dk_size <= 0xFFFF: 735 offset = 2 * dk_size 736 elif dk_size <= 0xFFFFFFFF: 737 offset = 4 * dk_size 738 else: 739 offset = 8 * dk_size 740 741 ent_addr = keys['dk_indices'].address 742 ent_addr = ent_addr.cast(_type_unsigned_char_ptr()) + offset 743 ent_ptr_t = gdb.lookup_type('PyDictKeyEntry').pointer() 744 ent_addr = ent_addr.cast(ent_ptr_t) 745 746 return ent_addr, dk_nentries 747 748 749class PyListObjectPtr(PyObjectPtr): 750 _typename = 'PyListObject' 751 752 def __getitem__(self, i): 753 # Get the gdb.Value for the (PyObject*) with the given index: 754 field_ob_item = self.field('ob_item') 755 return field_ob_item[i] 756 757 def proxyval(self, visited): 758 # Guard against infinite loops: 759 if self.as_address() in visited: 760 return ProxyAlreadyVisited('[...]') 761 visited.add(self.as_address()) 762 763 result = [PyObjectPtr.from_pyobject_ptr(self[i]).proxyval(visited) 764 for i in safe_range(int_from_int(self.field('ob_size')))] 765 return result 766 767 def write_repr(self, out, visited): 768 # Guard against infinite loops: 769 if self.as_address() in visited: 770 out.write('[...]') 771 return 772 visited.add(self.as_address()) 773 774 out.write('[') 775 for i in safe_range(int_from_int(self.field('ob_size'))): 776 if i > 0: 777 out.write(', ') 778 element = PyObjectPtr.from_pyobject_ptr(self[i]) 779 element.write_repr(out, visited) 780 out.write(']') 781 782class PyLongObjectPtr(PyObjectPtr): 783 _typename = 'PyLongObject' 784 785 def proxyval(self, visited): 786 ''' 787 Python's Include/longobjrep.h has this declaration: 788 struct _longobject { 789 PyObject_VAR_HEAD 790 digit ob_digit[1]; 791 }; 792 793 with this description: 794 The absolute value of a number is equal to 795 SUM(for i=0 through abs(ob_size)-1) ob_digit[i] * 2**(SHIFT*i) 796 Negative numbers are represented with ob_size < 0; 797 zero is represented by ob_size == 0. 798 799 where SHIFT can be either: 800 #define PyLong_SHIFT 30 801 #define PyLong_SHIFT 15 802 ''' 803 ob_size = long(self.field('ob_size')) 804 if ob_size == 0: 805 return 0 806 807 ob_digit = self.field('ob_digit') 808 809 if gdb.lookup_type('digit').sizeof == 2: 810 SHIFT = 15 811 else: 812 SHIFT = 30 813 814 digits = [long(ob_digit[i]) * 2**(SHIFT*i) 815 for i in safe_range(abs(ob_size))] 816 result = sum(digits) 817 if ob_size < 0: 818 result = -result 819 return result 820 821 def write_repr(self, out, visited): 822 # Write this out as a Python 3 int literal, i.e. without the "L" suffix 823 proxy = self.proxyval(visited) 824 out.write("%s" % proxy) 825 826 827class PyBoolObjectPtr(PyLongObjectPtr): 828 """ 829 Class wrapping a gdb.Value that's a PyBoolObject* i.e. one of the two 830 <bool> instances (Py_True/Py_False) within the process being debugged. 831 """ 832 def proxyval(self, visited): 833 if PyLongObjectPtr.proxyval(self, visited): 834 return True 835 else: 836 return False 837 838class PyNoneStructPtr(PyObjectPtr): 839 """ 840 Class wrapping a gdb.Value that's a PyObject* pointing to the 841 singleton (we hope) _Py_NoneStruct with ob_type PyNone_Type 842 """ 843 _typename = 'PyObject' 844 845 def proxyval(self, visited): 846 return None 847 848 849class PyFrameObjectPtr(PyObjectPtr): 850 _typename = 'PyFrameObject' 851 852 def __init__(self, gdbval, cast_to=None): 853 PyObjectPtr.__init__(self, gdbval, cast_to) 854 855 if not self.is_optimized_out(): 856 self.co = PyCodeObjectPtr.from_pyobject_ptr(self.field('f_code')) 857 self.co_name = self.co.pyop_field('co_name') 858 self.co_filename = self.co.pyop_field('co_filename') 859 860 self.f_lineno = int_from_int(self.field('f_lineno')) 861 self.f_lasti = int_from_int(self.field('f_lasti')) 862 self.co_nlocals = int_from_int(self.co.field('co_nlocals')) 863 self.co_varnames = PyTupleObjectPtr.from_pyobject_ptr(self.co.field('co_varnames')) 864 865 def iter_locals(self): 866 ''' 867 Yield a sequence of (name,value) pairs of PyObjectPtr instances, for 868 the local variables of this frame 869 ''' 870 if self.is_optimized_out(): 871 return 872 873 f_localsplus = self.field('f_localsplus') 874 for i in safe_range(self.co_nlocals): 875 pyop_value = PyObjectPtr.from_pyobject_ptr(f_localsplus[i]) 876 if not pyop_value.is_null(): 877 pyop_name = PyObjectPtr.from_pyobject_ptr(self.co_varnames[i]) 878 yield (pyop_name, pyop_value) 879 880 def iter_globals(self): 881 ''' 882 Yield a sequence of (name,value) pairs of PyObjectPtr instances, for 883 the global variables of this frame 884 ''' 885 if self.is_optimized_out(): 886 return () 887 888 pyop_globals = self.pyop_field('f_globals') 889 return pyop_globals.iteritems() 890 891 def iter_builtins(self): 892 ''' 893 Yield a sequence of (name,value) pairs of PyObjectPtr instances, for 894 the builtin variables 895 ''' 896 if self.is_optimized_out(): 897 return () 898 899 pyop_builtins = self.pyop_field('f_builtins') 900 return pyop_builtins.iteritems() 901 902 def get_var_by_name(self, name): 903 ''' 904 Look for the named local variable, returning a (PyObjectPtr, scope) pair 905 where scope is a string 'local', 'global', 'builtin' 906 907 If not found, return (None, None) 908 ''' 909 for pyop_name, pyop_value in self.iter_locals(): 910 if name == pyop_name.proxyval(set()): 911 return pyop_value, 'local' 912 for pyop_name, pyop_value in self.iter_globals(): 913 if name == pyop_name.proxyval(set()): 914 return pyop_value, 'global' 915 for pyop_name, pyop_value in self.iter_builtins(): 916 if name == pyop_name.proxyval(set()): 917 return pyop_value, 'builtin' 918 return None, None 919 920 def filename(self): 921 '''Get the path of the current Python source file, as a string''' 922 if self.is_optimized_out(): 923 return FRAME_INFO_OPTIMIZED_OUT 924 return self.co_filename.proxyval(set()) 925 926 def current_line_num(self): 927 '''Get current line number as an integer (1-based) 928 929 Translated from PyFrame_GetLineNumber and PyCode_Addr2Line 930 931 See Objects/lnotab_notes.txt 932 ''' 933 if self.is_optimized_out(): 934 return None 935 f_trace = self.field('f_trace') 936 if long(f_trace) != 0: 937 # we have a non-NULL f_trace: 938 return self.f_lineno 939 940 try: 941 return self.co.addr2line(self.f_lasti) 942 except Exception: 943 # bpo-34989: addr2line() is a complex function, it can fail in many 944 # ways. For example, it fails with a TypeError on "FakeRepr" if 945 # gdb fails to load debug symbols. Use a catch-all "except 946 # Exception" to make the whole function safe. The caller has to 947 # handle None anyway for optimized Python. 948 return None 949 950 def current_line(self): 951 '''Get the text of the current source line as a string, with a trailing 952 newline character''' 953 if self.is_optimized_out(): 954 return FRAME_INFO_OPTIMIZED_OUT 955 956 lineno = self.current_line_num() 957 if lineno is None: 958 return '(failed to get frame line number)' 959 960 filename = self.filename() 961 try: 962 with open(os_fsencode(filename), 'r') as fp: 963 lines = fp.readlines() 964 except IOError: 965 return None 966 967 try: 968 # Convert from 1-based current_line_num to 0-based list offset 969 return lines[lineno - 1] 970 except IndexError: 971 return None 972 973 def write_repr(self, out, visited): 974 if self.is_optimized_out(): 975 out.write(FRAME_INFO_OPTIMIZED_OUT) 976 return 977 lineno = self.current_line_num() 978 lineno = str(lineno) if lineno is not None else "?" 979 out.write('Frame 0x%x, for file %s, line %s, in %s (' 980 % (self.as_address(), 981 self.co_filename.proxyval(visited), 982 lineno, 983 self.co_name.proxyval(visited))) 984 first = True 985 for pyop_name, pyop_value in self.iter_locals(): 986 if not first: 987 out.write(', ') 988 first = False 989 990 out.write(pyop_name.proxyval(visited)) 991 out.write('=') 992 pyop_value.write_repr(out, visited) 993 994 out.write(')') 995 996 def print_traceback(self): 997 if self.is_optimized_out(): 998 sys.stdout.write(' %s\n' % FRAME_INFO_OPTIMIZED_OUT) 999 return 1000 visited = set() 1001 lineno = self.current_line_num() 1002 lineno = str(lineno) if lineno is not None else "?" 1003 sys.stdout.write(' File "%s", line %s, in %s\n' 1004 % (self.co_filename.proxyval(visited), 1005 lineno, 1006 self.co_name.proxyval(visited))) 1007 1008class PySetObjectPtr(PyObjectPtr): 1009 _typename = 'PySetObject' 1010 1011 @classmethod 1012 def _dummy_key(self): 1013 return gdb.lookup_global_symbol('_PySet_Dummy').value() 1014 1015 def __iter__(self): 1016 dummy_ptr = self._dummy_key() 1017 table = self.field('table') 1018 for i in safe_range(self.field('mask') + 1): 1019 setentry = table[i] 1020 key = setentry['key'] 1021 if key != 0 and key != dummy_ptr: 1022 yield PyObjectPtr.from_pyobject_ptr(key) 1023 1024 def proxyval(self, visited): 1025 # Guard against infinite loops: 1026 if self.as_address() in visited: 1027 return ProxyAlreadyVisited('%s(...)' % self.safe_tp_name()) 1028 visited.add(self.as_address()) 1029 1030 members = (key.proxyval(visited) for key in self) 1031 if self.safe_tp_name() == 'frozenset': 1032 return frozenset(members) 1033 else: 1034 return set(members) 1035 1036 def write_repr(self, out, visited): 1037 # Emulate Python 3's set_repr 1038 tp_name = self.safe_tp_name() 1039 1040 # Guard against infinite loops: 1041 if self.as_address() in visited: 1042 out.write('(...)') 1043 return 1044 visited.add(self.as_address()) 1045 1046 # Python 3's set_repr special-cases the empty set: 1047 if not self.field('used'): 1048 out.write(tp_name) 1049 out.write('()') 1050 return 1051 1052 # Python 3 uses {} for set literals: 1053 if tp_name != 'set': 1054 out.write(tp_name) 1055 out.write('(') 1056 1057 out.write('{') 1058 first = True 1059 for key in self: 1060 if not first: 1061 out.write(', ') 1062 first = False 1063 key.write_repr(out, visited) 1064 out.write('}') 1065 1066 if tp_name != 'set': 1067 out.write(')') 1068 1069 1070class PyBytesObjectPtr(PyObjectPtr): 1071 _typename = 'PyBytesObject' 1072 1073 def __str__(self): 1074 field_ob_size = self.field('ob_size') 1075 field_ob_sval = self.field('ob_sval') 1076 char_ptr = field_ob_sval.address.cast(_type_unsigned_char_ptr()) 1077 return ''.join([chr(char_ptr[i]) for i in safe_range(field_ob_size)]) 1078 1079 def proxyval(self, visited): 1080 return str(self) 1081 1082 def write_repr(self, out, visited): 1083 # Write this out as a Python 3 bytes literal, i.e. with a "b" prefix 1084 1085 # Get a PyStringObject* within the Python 2 gdb process: 1086 proxy = self.proxyval(visited) 1087 1088 # Transliteration of Python 3's Objects/bytesobject.c:PyBytes_Repr 1089 # to Python 2 code: 1090 quote = "'" 1091 if "'" in proxy and not '"' in proxy: 1092 quote = '"' 1093 out.write('b') 1094 out.write(quote) 1095 for byte in proxy: 1096 if byte == quote or byte == '\\': 1097 out.write('\\') 1098 out.write(byte) 1099 elif byte == '\t': 1100 out.write('\\t') 1101 elif byte == '\n': 1102 out.write('\\n') 1103 elif byte == '\r': 1104 out.write('\\r') 1105 elif byte < ' ' or ord(byte) >= 0x7f: 1106 out.write('\\x') 1107 out.write(hexdigits[(ord(byte) & 0xf0) >> 4]) 1108 out.write(hexdigits[ord(byte) & 0xf]) 1109 else: 1110 out.write(byte) 1111 out.write(quote) 1112 1113class PyTupleObjectPtr(PyObjectPtr): 1114 _typename = 'PyTupleObject' 1115 1116 def __getitem__(self, i): 1117 # Get the gdb.Value for the (PyObject*) with the given index: 1118 field_ob_item = self.field('ob_item') 1119 return field_ob_item[i] 1120 1121 def proxyval(self, visited): 1122 # Guard against infinite loops: 1123 if self.as_address() in visited: 1124 return ProxyAlreadyVisited('(...)') 1125 visited.add(self.as_address()) 1126 1127 result = tuple(PyObjectPtr.from_pyobject_ptr(self[i]).proxyval(visited) 1128 for i in safe_range(int_from_int(self.field('ob_size')))) 1129 return result 1130 1131 def write_repr(self, out, visited): 1132 # Guard against infinite loops: 1133 if self.as_address() in visited: 1134 out.write('(...)') 1135 return 1136 visited.add(self.as_address()) 1137 1138 out.write('(') 1139 for i in safe_range(int_from_int(self.field('ob_size'))): 1140 if i > 0: 1141 out.write(', ') 1142 element = PyObjectPtr.from_pyobject_ptr(self[i]) 1143 element.write_repr(out, visited) 1144 if self.field('ob_size') == 1: 1145 out.write(',)') 1146 else: 1147 out.write(')') 1148 1149class PyTypeObjectPtr(PyObjectPtr): 1150 _typename = 'PyTypeObject' 1151 1152 1153def _unichr_is_printable(char): 1154 # Logic adapted from Python 3's Tools/unicode/makeunicodedata.py 1155 if char == u" ": 1156 return True 1157 import unicodedata 1158 return unicodedata.category(char) not in ("C", "Z") 1159 1160if sys.maxunicode >= 0x10000: 1161 _unichr = unichr 1162else: 1163 # Needed for proper surrogate support if sizeof(Py_UNICODE) is 2 in gdb 1164 def _unichr(x): 1165 if x < 0x10000: 1166 return unichr(x) 1167 x -= 0x10000 1168 ch1 = 0xD800 | (x >> 10) 1169 ch2 = 0xDC00 | (x & 0x3FF) 1170 return unichr(ch1) + unichr(ch2) 1171 1172 1173class PyUnicodeObjectPtr(PyObjectPtr): 1174 _typename = 'PyUnicodeObject' 1175 1176 def char_width(self): 1177 _type_Py_UNICODE = gdb.lookup_type('Py_UNICODE') 1178 return _type_Py_UNICODE.sizeof 1179 1180 def proxyval(self, visited): 1181 global _is_pep393 1182 if _is_pep393 is None: 1183 fields = gdb.lookup_type('PyUnicodeObject').fields() 1184 _is_pep393 = 'data' in [f.name for f in fields] 1185 if _is_pep393: 1186 # Python 3.3 and newer 1187 may_have_surrogates = False 1188 compact = self.field('_base') 1189 ascii = compact['_base'] 1190 state = ascii['state'] 1191 is_compact_ascii = (int(state['ascii']) and int(state['compact'])) 1192 if not int(state['ready']): 1193 # string is not ready 1194 field_length = long(compact['wstr_length']) 1195 may_have_surrogates = True 1196 field_str = ascii['wstr'] 1197 else: 1198 field_length = long(ascii['length']) 1199 if is_compact_ascii: 1200 field_str = ascii.address + 1 1201 elif int(state['compact']): 1202 field_str = compact.address + 1 1203 else: 1204 field_str = self.field('data')['any'] 1205 repr_kind = int(state['kind']) 1206 if repr_kind == 1: 1207 field_str = field_str.cast(_type_unsigned_char_ptr()) 1208 elif repr_kind == 2: 1209 field_str = field_str.cast(_type_unsigned_short_ptr()) 1210 elif repr_kind == 4: 1211 field_str = field_str.cast(_type_unsigned_int_ptr()) 1212 else: 1213 # Python 3.2 and earlier 1214 field_length = long(self.field('length')) 1215 field_str = self.field('str') 1216 may_have_surrogates = self.char_width() == 2 1217 1218 # Gather a list of ints from the Py_UNICODE array; these are either 1219 # UCS-1, UCS-2 or UCS-4 code points: 1220 if not may_have_surrogates: 1221 Py_UNICODEs = [int(field_str[i]) for i in safe_range(field_length)] 1222 else: 1223 # A more elaborate routine if sizeof(Py_UNICODE) is 2 in the 1224 # inferior process: we must join surrogate pairs. 1225 Py_UNICODEs = [] 1226 i = 0 1227 limit = safety_limit(field_length) 1228 while i < limit: 1229 ucs = int(field_str[i]) 1230 i += 1 1231 if ucs < 0xD800 or ucs >= 0xDC00 or i == field_length: 1232 Py_UNICODEs.append(ucs) 1233 continue 1234 # This could be a surrogate pair. 1235 ucs2 = int(field_str[i]) 1236 if ucs2 < 0xDC00 or ucs2 > 0xDFFF: 1237 continue 1238 code = (ucs & 0x03FF) << 10 1239 code |= ucs2 & 0x03FF 1240 code += 0x00010000 1241 Py_UNICODEs.append(code) 1242 i += 1 1243 1244 # Convert the int code points to unicode characters, and generate a 1245 # local unicode instance. 1246 # This splits surrogate pairs if sizeof(Py_UNICODE) is 2 here (in gdb). 1247 result = u''.join([ 1248 (_unichr(ucs) if ucs <= 0x10ffff else '\ufffd') 1249 for ucs in Py_UNICODEs]) 1250 return result 1251 1252 def write_repr(self, out, visited): 1253 # Write this out as a Python 3 str literal, i.e. without a "u" prefix 1254 1255 # Get a PyUnicodeObject* within the Python 2 gdb process: 1256 proxy = self.proxyval(visited) 1257 1258 # Transliteration of Python 3's Object/unicodeobject.c:unicode_repr 1259 # to Python 2: 1260 if "'" in proxy and '"' not in proxy: 1261 quote = '"' 1262 else: 1263 quote = "'" 1264 out.write(quote) 1265 1266 i = 0 1267 while i < len(proxy): 1268 ch = proxy[i] 1269 i += 1 1270 1271 # Escape quotes and backslashes 1272 if ch == quote or ch == '\\': 1273 out.write('\\') 1274 out.write(ch) 1275 1276 # Map special whitespace to '\t', \n', '\r' 1277 elif ch == '\t': 1278 out.write('\\t') 1279 elif ch == '\n': 1280 out.write('\\n') 1281 elif ch == '\r': 1282 out.write('\\r') 1283 1284 # Map non-printable US ASCII to '\xhh' */ 1285 elif ch < ' ' or ch == 0x7F: 1286 out.write('\\x') 1287 out.write(hexdigits[(ord(ch) >> 4) & 0x000F]) 1288 out.write(hexdigits[ord(ch) & 0x000F]) 1289 1290 # Copy ASCII characters as-is 1291 elif ord(ch) < 0x7F: 1292 out.write(ch) 1293 1294 # Non-ASCII characters 1295 else: 1296 ucs = ch 1297 ch2 = None 1298 if sys.maxunicode < 0x10000: 1299 # If sizeof(Py_UNICODE) is 2 here (in gdb), join 1300 # surrogate pairs before calling _unichr_is_printable. 1301 if (i < len(proxy) 1302 and 0xD800 <= ord(ch) < 0xDC00 \ 1303 and 0xDC00 <= ord(proxy[i]) <= 0xDFFF): 1304 ch2 = proxy[i] 1305 ucs = ch + ch2 1306 i += 1 1307 1308 # Unfortuately, Python 2's unicode type doesn't seem 1309 # to expose the "isprintable" method 1310 printable = _unichr_is_printable(ucs) 1311 if printable: 1312 try: 1313 ucs.encode(ENCODING) 1314 except UnicodeEncodeError: 1315 printable = False 1316 1317 # Map Unicode whitespace and control characters 1318 # (categories Z* and C* except ASCII space) 1319 if not printable: 1320 if ch2 is not None: 1321 # Match Python 3's representation of non-printable 1322 # wide characters. 1323 code = (ord(ch) & 0x03FF) << 10 1324 code |= ord(ch2) & 0x03FF 1325 code += 0x00010000 1326 else: 1327 code = ord(ucs) 1328 1329 # Map 8-bit characters to '\\xhh' 1330 if code <= 0xff: 1331 out.write('\\x') 1332 out.write(hexdigits[(code >> 4) & 0x000F]) 1333 out.write(hexdigits[code & 0x000F]) 1334 # Map 21-bit characters to '\U00xxxxxx' 1335 elif code >= 0x10000: 1336 out.write('\\U') 1337 out.write(hexdigits[(code >> 28) & 0x0000000F]) 1338 out.write(hexdigits[(code >> 24) & 0x0000000F]) 1339 out.write(hexdigits[(code >> 20) & 0x0000000F]) 1340 out.write(hexdigits[(code >> 16) & 0x0000000F]) 1341 out.write(hexdigits[(code >> 12) & 0x0000000F]) 1342 out.write(hexdigits[(code >> 8) & 0x0000000F]) 1343 out.write(hexdigits[(code >> 4) & 0x0000000F]) 1344 out.write(hexdigits[code & 0x0000000F]) 1345 # Map 16-bit characters to '\uxxxx' 1346 else: 1347 out.write('\\u') 1348 out.write(hexdigits[(code >> 12) & 0x000F]) 1349 out.write(hexdigits[(code >> 8) & 0x000F]) 1350 out.write(hexdigits[(code >> 4) & 0x000F]) 1351 out.write(hexdigits[code & 0x000F]) 1352 else: 1353 # Copy characters as-is 1354 out.write(ch) 1355 if ch2 is not None: 1356 out.write(ch2) 1357 1358 out.write(quote) 1359 1360 1361class wrapperobject(PyObjectPtr): 1362 _typename = 'wrapperobject' 1363 1364 def safe_name(self): 1365 try: 1366 name = self.field('descr')['d_base']['name'].string() 1367 return repr(name) 1368 except (NullPyObjectPtr, RuntimeError, UnicodeDecodeError): 1369 return '<unknown name>' 1370 1371 def safe_tp_name(self): 1372 try: 1373 return self.field('self')['ob_type']['tp_name'].string() 1374 except (NullPyObjectPtr, RuntimeError, UnicodeDecodeError): 1375 return '<unknown tp_name>' 1376 1377 def safe_self_addresss(self): 1378 try: 1379 address = long(self.field('self')) 1380 return '%#x' % address 1381 except (NullPyObjectPtr, RuntimeError): 1382 return '<failed to get self address>' 1383 1384 def proxyval(self, visited): 1385 name = self.safe_name() 1386 tp_name = self.safe_tp_name() 1387 self_address = self.safe_self_addresss() 1388 return ("<method-wrapper %s of %s object at %s>" 1389 % (name, tp_name, self_address)) 1390 1391 def write_repr(self, out, visited): 1392 proxy = self.proxyval(visited) 1393 out.write(proxy) 1394 1395 1396def int_from_int(gdbval): 1397 return int(gdbval) 1398 1399 1400def stringify(val): 1401 # TODO: repr() puts everything on one line; pformat can be nicer, but 1402 # can lead to v.long results; this function isolates the choice 1403 if True: 1404 return repr(val) 1405 else: 1406 from pprint import pformat 1407 return pformat(val) 1408 1409 1410class PyObjectPtrPrinter: 1411 "Prints a (PyObject*)" 1412 1413 def __init__ (self, gdbval): 1414 self.gdbval = gdbval 1415 1416 def to_string (self): 1417 pyop = PyObjectPtr.from_pyobject_ptr(self.gdbval) 1418 if True: 1419 return pyop.get_truncated_repr(MAX_OUTPUT_LEN) 1420 else: 1421 # Generate full proxy value then stringify it. 1422 # Doing so could be expensive 1423 proxyval = pyop.proxyval(set()) 1424 return stringify(proxyval) 1425 1426def pretty_printer_lookup(gdbval): 1427 type = gdbval.type.unqualified() 1428 if type.code != gdb.TYPE_CODE_PTR: 1429 return None 1430 1431 type = type.target().unqualified() 1432 t = str(type) 1433 if t in ("PyObject", "PyFrameObject", "PyUnicodeObject", "wrapperobject"): 1434 return PyObjectPtrPrinter(gdbval) 1435 1436""" 1437During development, I've been manually invoking the code in this way: 1438(gdb) python 1439 1440import sys 1441sys.path.append('/home/david/coding/python-gdb') 1442import libpython 1443end 1444 1445then reloading it after each edit like this: 1446(gdb) python reload(libpython) 1447 1448The following code should ensure that the prettyprinter is registered 1449if the code is autoloaded by gdb when visiting libpython.so, provided 1450that this python file is installed to the same path as the library (or its 1451.debug file) plus a "-gdb.py" suffix, e.g: 1452 /usr/lib/libpython2.6.so.1.0-gdb.py 1453 /usr/lib/debug/usr/lib/libpython2.6.so.1.0.debug-gdb.py 1454""" 1455def register (obj): 1456 if obj is None: 1457 obj = gdb 1458 1459 # Wire up the pretty-printer 1460 obj.pretty_printers.append(pretty_printer_lookup) 1461 1462register (gdb.current_objfile ()) 1463 1464 1465 1466# Unfortunately, the exact API exposed by the gdb module varies somewhat 1467# from build to build 1468# See http://bugs.python.org/issue8279?#msg102276 1469 1470class Frame(object): 1471 ''' 1472 Wrapper for gdb.Frame, adding various methods 1473 ''' 1474 def __init__(self, gdbframe): 1475 self._gdbframe = gdbframe 1476 1477 def older(self): 1478 older = self._gdbframe.older() 1479 if older: 1480 return Frame(older) 1481 else: 1482 return None 1483 1484 def newer(self): 1485 newer = self._gdbframe.newer() 1486 if newer: 1487 return Frame(newer) 1488 else: 1489 return None 1490 1491 def select(self): 1492 '''If supported, select this frame and return True; return False if unsupported 1493 1494 Not all builds have a gdb.Frame.select method; seems to be present on Fedora 12 1495 onwards, but absent on Ubuntu buildbot''' 1496 if not hasattr(self._gdbframe, 'select'): 1497 print ('Unable to select frame: ' 1498 'this build of gdb does not expose a gdb.Frame.select method') 1499 return False 1500 self._gdbframe.select() 1501 return True 1502 1503 def get_index(self): 1504 '''Calculate index of frame, starting at 0 for the newest frame within 1505 this thread''' 1506 index = 0 1507 # Go down until you reach the newest frame: 1508 iter_frame = self 1509 while iter_frame.newer(): 1510 index += 1 1511 iter_frame = iter_frame.newer() 1512 return index 1513 1514 # We divide frames into: 1515 # - "python frames": 1516 # - "bytecode frames" i.e. PyEval_EvalFrameEx 1517 # - "other python frames": things that are of interest from a python 1518 # POV, but aren't bytecode (e.g. GC, GIL) 1519 # - everything else 1520 1521 def is_python_frame(self): 1522 '''Is this a _PyEval_EvalFrameDefault frame, or some other important 1523 frame? (see is_other_python_frame for what "important" means in this 1524 context)''' 1525 if self.is_evalframe(): 1526 return True 1527 if self.is_other_python_frame(): 1528 return True 1529 return False 1530 1531 def is_evalframe(self): 1532 '''Is this a _PyEval_EvalFrameDefault frame?''' 1533 if self._gdbframe.name() == EVALFRAME: 1534 ''' 1535 I believe we also need to filter on the inline 1536 struct frame_id.inline_depth, only regarding frames with 1537 an inline depth of 0 as actually being this function 1538 1539 So we reject those with type gdb.INLINE_FRAME 1540 ''' 1541 if self._gdbframe.type() == gdb.NORMAL_FRAME: 1542 # We have a _PyEval_EvalFrameDefault frame: 1543 return True 1544 1545 return False 1546 1547 def is_other_python_frame(self): 1548 '''Is this frame worth displaying in python backtraces? 1549 Examples: 1550 - waiting on the GIL 1551 - garbage-collecting 1552 - within a CFunction 1553 If it is, return a descriptive string 1554 For other frames, return False 1555 ''' 1556 if self.is_waiting_for_gil(): 1557 return 'Waiting for the GIL' 1558 1559 if self.is_gc_collect(): 1560 return 'Garbage-collecting' 1561 1562 # Detect invocations of PyCFunction instances: 1563 frame = self._gdbframe 1564 caller = frame.name() 1565 if not caller: 1566 return False 1567 1568 if (caller.startswith('cfunction_vectorcall_') or 1569 caller == 'cfunction_call'): 1570 arg_name = 'func' 1571 # Within that frame: 1572 # "func" is the local containing the PyObject* of the 1573 # PyCFunctionObject instance 1574 # "f" is the same value, but cast to (PyCFunctionObject*) 1575 # "self" is the (PyObject*) of the 'self' 1576 try: 1577 # Use the prettyprinter for the func: 1578 func = frame.read_var(arg_name) 1579 return str(func) 1580 except ValueError: 1581 return ('PyCFunction invocation (unable to read %s: ' 1582 'missing debuginfos?)' % arg_name) 1583 except RuntimeError: 1584 return 'PyCFunction invocation (unable to read %s)' % arg_name 1585 1586 if caller == 'wrapper_call': 1587 arg_name = 'wp' 1588 try: 1589 func = frame.read_var(arg_name) 1590 return str(func) 1591 except ValueError: 1592 return ('<wrapper_call invocation (unable to read %s: ' 1593 'missing debuginfos?)>' % arg_name) 1594 except RuntimeError: 1595 return '<wrapper_call invocation (unable to read %s)>' % arg_name 1596 1597 # This frame isn't worth reporting: 1598 return False 1599 1600 def is_waiting_for_gil(self): 1601 '''Is this frame waiting on the GIL?''' 1602 # This assumes the _POSIX_THREADS version of Python/ceval_gil.h: 1603 name = self._gdbframe.name() 1604 if name: 1605 return (name == 'take_gil') 1606 1607 def is_gc_collect(self): 1608 '''Is this frame "collect" within the garbage-collector?''' 1609 return self._gdbframe.name() == 'collect' 1610 1611 def get_pyop(self): 1612 try: 1613 f = self._gdbframe.read_var('f') 1614 frame = PyFrameObjectPtr.from_pyobject_ptr(f) 1615 if not frame.is_optimized_out(): 1616 return frame 1617 # gdb is unable to get the "f" argument of PyEval_EvalFrameEx() 1618 # because it was "optimized out". Try to get "f" from the frame 1619 # of the caller, PyEval_EvalCodeEx(). 1620 orig_frame = frame 1621 caller = self._gdbframe.older() 1622 if caller: 1623 f = caller.read_var('f') 1624 frame = PyFrameObjectPtr.from_pyobject_ptr(f) 1625 if not frame.is_optimized_out(): 1626 return frame 1627 return orig_frame 1628 except ValueError: 1629 return None 1630 1631 @classmethod 1632 def get_selected_frame(cls): 1633 _gdbframe = gdb.selected_frame() 1634 if _gdbframe: 1635 return Frame(_gdbframe) 1636 return None 1637 1638 @classmethod 1639 def get_selected_python_frame(cls): 1640 '''Try to obtain the Frame for the python-related code in the selected 1641 frame, or None''' 1642 try: 1643 frame = cls.get_selected_frame() 1644 except gdb.error: 1645 # No frame: Python didn't start yet 1646 return None 1647 1648 while frame: 1649 if frame.is_python_frame(): 1650 return frame 1651 frame = frame.older() 1652 1653 # Not found: 1654 return None 1655 1656 @classmethod 1657 def get_selected_bytecode_frame(cls): 1658 '''Try to obtain the Frame for the python bytecode interpreter in the 1659 selected GDB frame, or None''' 1660 frame = cls.get_selected_frame() 1661 1662 while frame: 1663 if frame.is_evalframe(): 1664 return frame 1665 frame = frame.older() 1666 1667 # Not found: 1668 return None 1669 1670 def print_summary(self): 1671 if self.is_evalframe(): 1672 pyop = self.get_pyop() 1673 if pyop: 1674 line = pyop.get_truncated_repr(MAX_OUTPUT_LEN) 1675 write_unicode(sys.stdout, '#%i %s\n' % (self.get_index(), line)) 1676 if not pyop.is_optimized_out(): 1677 line = pyop.current_line() 1678 if line is not None: 1679 sys.stdout.write(' %s\n' % line.strip()) 1680 else: 1681 sys.stdout.write('#%i (unable to read python frame information)\n' % self.get_index()) 1682 else: 1683 info = self.is_other_python_frame() 1684 if info: 1685 sys.stdout.write('#%i %s\n' % (self.get_index(), info)) 1686 else: 1687 sys.stdout.write('#%i\n' % self.get_index()) 1688 1689 def print_traceback(self): 1690 if self.is_evalframe(): 1691 pyop = self.get_pyop() 1692 if pyop: 1693 pyop.print_traceback() 1694 if not pyop.is_optimized_out(): 1695 line = pyop.current_line() 1696 if line is not None: 1697 sys.stdout.write(' %s\n' % line.strip()) 1698 else: 1699 sys.stdout.write(' (unable to read python frame information)\n') 1700 else: 1701 info = self.is_other_python_frame() 1702 if info: 1703 sys.stdout.write(' %s\n' % info) 1704 else: 1705 sys.stdout.write(' (not a python frame)\n') 1706 1707class PyList(gdb.Command): 1708 '''List the current Python source code, if any 1709 1710 Use 1711 py-list START 1712 to list at a different line number within the python source. 1713 1714 Use 1715 py-list START, END 1716 to list a specific range of lines within the python source. 1717 ''' 1718 1719 def __init__(self): 1720 gdb.Command.__init__ (self, 1721 "py-list", 1722 gdb.COMMAND_FILES, 1723 gdb.COMPLETE_NONE) 1724 1725 1726 def invoke(self, args, from_tty): 1727 import re 1728 1729 start = None 1730 end = None 1731 1732 m = re.match(r'\s*(\d+)\s*', args) 1733 if m: 1734 start = int(m.group(0)) 1735 end = start + 10 1736 1737 m = re.match(r'\s*(\d+)\s*,\s*(\d+)\s*', args) 1738 if m: 1739 start, end = map(int, m.groups()) 1740 1741 # py-list requires an actual PyEval_EvalFrameEx frame: 1742 frame = Frame.get_selected_bytecode_frame() 1743 if not frame: 1744 print('Unable to locate gdb frame for python bytecode interpreter') 1745 return 1746 1747 pyop = frame.get_pyop() 1748 if not pyop or pyop.is_optimized_out(): 1749 print(UNABLE_READ_INFO_PYTHON_FRAME) 1750 return 1751 1752 filename = pyop.filename() 1753 lineno = pyop.current_line_num() 1754 if lineno is None: 1755 print('Unable to read python frame line number') 1756 return 1757 1758 if start is None: 1759 start = lineno - 5 1760 end = lineno + 5 1761 1762 if start<1: 1763 start = 1 1764 1765 try: 1766 f = open(os_fsencode(filename), 'r') 1767 except IOError as err: 1768 sys.stdout.write('Unable to open %s: %s\n' 1769 % (filename, err)) 1770 return 1771 with f: 1772 all_lines = f.readlines() 1773 # start and end are 1-based, all_lines is 0-based; 1774 # so [start-1:end] as a python slice gives us [start, end] as a 1775 # closed interval 1776 for i, line in enumerate(all_lines[start-1:end]): 1777 linestr = str(i+start) 1778 # Highlight current line: 1779 if i + start == lineno: 1780 linestr = '>' + linestr 1781 sys.stdout.write('%4s %s' % (linestr, line)) 1782 1783 1784# ...and register the command: 1785PyList() 1786 1787def move_in_stack(move_up): 1788 '''Move up or down the stack (for the py-up/py-down command)''' 1789 frame = Frame.get_selected_python_frame() 1790 if not frame: 1791 print('Unable to locate python frame') 1792 return 1793 1794 while frame: 1795 if move_up: 1796 iter_frame = frame.older() 1797 else: 1798 iter_frame = frame.newer() 1799 1800 if not iter_frame: 1801 break 1802 1803 if iter_frame.is_python_frame(): 1804 # Result: 1805 if iter_frame.select(): 1806 iter_frame.print_summary() 1807 return 1808 1809 frame = iter_frame 1810 1811 if move_up: 1812 print('Unable to find an older python frame') 1813 else: 1814 print('Unable to find a newer python frame') 1815 1816class PyUp(gdb.Command): 1817 'Select and print the python stack frame that called this one (if any)' 1818 def __init__(self): 1819 gdb.Command.__init__ (self, 1820 "py-up", 1821 gdb.COMMAND_STACK, 1822 gdb.COMPLETE_NONE) 1823 1824 1825 def invoke(self, args, from_tty): 1826 move_in_stack(move_up=True) 1827 1828class PyDown(gdb.Command): 1829 'Select and print the python stack frame called by this one (if any)' 1830 def __init__(self): 1831 gdb.Command.__init__ (self, 1832 "py-down", 1833 gdb.COMMAND_STACK, 1834 gdb.COMPLETE_NONE) 1835 1836 1837 def invoke(self, args, from_tty): 1838 move_in_stack(move_up=False) 1839 1840# Not all builds of gdb have gdb.Frame.select 1841if hasattr(gdb.Frame, 'select'): 1842 PyUp() 1843 PyDown() 1844 1845class PyBacktraceFull(gdb.Command): 1846 'Display the current python frame and all the frames within its call stack (if any)' 1847 def __init__(self): 1848 gdb.Command.__init__ (self, 1849 "py-bt-full", 1850 gdb.COMMAND_STACK, 1851 gdb.COMPLETE_NONE) 1852 1853 1854 def invoke(self, args, from_tty): 1855 frame = Frame.get_selected_python_frame() 1856 if not frame: 1857 print('Unable to locate python frame') 1858 return 1859 1860 while frame: 1861 if frame.is_python_frame(): 1862 frame.print_summary() 1863 frame = frame.older() 1864 1865PyBacktraceFull() 1866 1867class PyBacktrace(gdb.Command): 1868 'Display the current python frame and all the frames within its call stack (if any)' 1869 def __init__(self): 1870 gdb.Command.__init__ (self, 1871 "py-bt", 1872 gdb.COMMAND_STACK, 1873 gdb.COMPLETE_NONE) 1874 1875 1876 def invoke(self, args, from_tty): 1877 frame = Frame.get_selected_python_frame() 1878 if not frame: 1879 print('Unable to locate python frame') 1880 return 1881 1882 sys.stdout.write('Traceback (most recent call first):\n') 1883 while frame: 1884 if frame.is_python_frame(): 1885 frame.print_traceback() 1886 frame = frame.older() 1887 1888PyBacktrace() 1889 1890class PyPrint(gdb.Command): 1891 'Look up the given python variable name, and print it' 1892 def __init__(self): 1893 gdb.Command.__init__ (self, 1894 "py-print", 1895 gdb.COMMAND_DATA, 1896 gdb.COMPLETE_NONE) 1897 1898 1899 def invoke(self, args, from_tty): 1900 name = str(args) 1901 1902 frame = Frame.get_selected_python_frame() 1903 if not frame: 1904 print('Unable to locate python frame') 1905 return 1906 1907 pyop_frame = frame.get_pyop() 1908 if not pyop_frame: 1909 print(UNABLE_READ_INFO_PYTHON_FRAME) 1910 return 1911 1912 pyop_var, scope = pyop_frame.get_var_by_name(name) 1913 1914 if pyop_var: 1915 print('%s %r = %s' 1916 % (scope, 1917 name, 1918 pyop_var.get_truncated_repr(MAX_OUTPUT_LEN))) 1919 else: 1920 print('%r not found' % name) 1921 1922PyPrint() 1923 1924class PyLocals(gdb.Command): 1925 'Look up the given python variable name, and print it' 1926 def __init__(self): 1927 gdb.Command.__init__ (self, 1928 "py-locals", 1929 gdb.COMMAND_DATA, 1930 gdb.COMPLETE_NONE) 1931 1932 1933 def invoke(self, args, from_tty): 1934 name = str(args) 1935 1936 frame = Frame.get_selected_python_frame() 1937 if not frame: 1938 print('Unable to locate python frame') 1939 return 1940 1941 pyop_frame = frame.get_pyop() 1942 if not pyop_frame: 1943 print(UNABLE_READ_INFO_PYTHON_FRAME) 1944 return 1945 1946 for pyop_name, pyop_value in pyop_frame.iter_locals(): 1947 print('%s = %s' 1948 % (pyop_name.proxyval(set()), 1949 pyop_value.get_truncated_repr(MAX_OUTPUT_LEN))) 1950 1951PyLocals() 1952