• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#!/usr/bin/python
2'''
3From gdb 7 onwards, gdb's build can be configured --with-python, allowing gdb
4to be extended with Python code e.g. for library-specific data visualizations,
5such as for the C++ STL types.  Documentation on this API can be seen at:
6http://sourceware.org/gdb/current/onlinedocs/gdb/Python-API.html
7
8
9This python module deals with the case when the process being debugged (the
10"inferior process" in gdb parlance) is itself python, or more specifically,
11linked against libpython.  In this situation, almost every item of data is a
12(PyObject*), and having the debugger merely print their addresses is not very
13enlightening.
14
15This module embeds knowledge about the implementation details of libpython so
16that we can emit useful visualizations e.g. a string, a list, a dict, a frame
17giving file/line information and the state of local variables
18
19In particular, given a gdb.Value corresponding to a PyObject* in the inferior
20process, we can generate a "proxy value" within the gdb process.  For example,
21given a PyObject* in the inferior process that is in fact a PyListObject*
22holding three PyObject* that turn out to be PyBytesObject* instances, we can
23generate a proxy value within the gdb process that is a list of bytes
24instances:
25  [b"foo", b"bar", b"baz"]
26
27Doing so can be expensive for complicated graphs of objects, and could take
28some time, so we also have a "write_repr" method that writes a representation
29of the data to a file-like object.  This allows us to stop the traversal by
30having the file-like object raise an exception if it gets too much data.
31
32With both "proxyval" and "write_repr" we keep track of the set of all addresses
33visited so far in the traversal, to avoid infinite recursion due to cycles in
34the graph of object references.
35
36We try to defer gdb.lookup_type() invocations for python types until as late as
37possible: for a dynamically linked python binary, when the process starts in
38the debugger, the libpython.so hasn't been dynamically loaded yet, so none of
39the type names are known to the debugger
40
41The module also extends gdb with some python-specific commands.
42'''
43
44# NOTE: some gdbs are linked with Python 3, so this file should be dual-syntax
45# compatible (2.6+ and 3.0+).  See #19308.
46
47from __future__ import print_function
48import gdb
49import os
50import locale
51import sys
52
53if sys.version_info[0] >= 3:
54    unichr = chr
55    xrange = range
56    long = int
57
58# Look up the gdb.Type for some standard types:
59# Those need to be refreshed as types (pointer sizes) may change when
60# gdb loads different executables
61
62def _type_char_ptr():
63    return gdb.lookup_type('char').pointer()  # char*
64
65
66def _type_unsigned_char_ptr():
67    return gdb.lookup_type('unsigned char').pointer()  # unsigned char*
68
69
70def _type_unsigned_short_ptr():
71    return gdb.lookup_type('unsigned short').pointer()
72
73
74def _type_unsigned_int_ptr():
75    return gdb.lookup_type('unsigned int').pointer()
76
77
78def _sizeof_void_p():
79    return gdb.lookup_type('void').pointer().sizeof
80
81
82# value computed later, see PyUnicodeObjectPtr.proxy()
83_is_pep393 = None
84
85Py_TPFLAGS_HEAPTYPE = (1 << 9)
86Py_TPFLAGS_LONG_SUBCLASS     = (1 << 24)
87Py_TPFLAGS_LIST_SUBCLASS     = (1 << 25)
88Py_TPFLAGS_TUPLE_SUBCLASS    = (1 << 26)
89Py_TPFLAGS_BYTES_SUBCLASS    = (1 << 27)
90Py_TPFLAGS_UNICODE_SUBCLASS  = (1 << 28)
91Py_TPFLAGS_DICT_SUBCLASS     = (1 << 29)
92Py_TPFLAGS_BASE_EXC_SUBCLASS = (1 << 30)
93Py_TPFLAGS_TYPE_SUBCLASS     = (1 << 31)
94
95
96MAX_OUTPUT_LEN=1024
97
98hexdigits = "0123456789abcdef"
99
100ENCODING = locale.getpreferredencoding()
101
102FRAME_INFO_OPTIMIZED_OUT = '(frame information optimized out)'
103UNABLE_READ_INFO_PYTHON_FRAME = 'Unable to read information on python frame'
104EVALFRAME = '_PyEval_EvalFrameDefault'
105
106class NullPyObjectPtr(RuntimeError):
107    pass
108
109
110def safety_limit(val):
111    # Given an integer value from the process being debugged, limit it to some
112    # safety threshold so that arbitrary breakage within said process doesn't
113    # break the gdb process too much (e.g. sizes of iterations, sizes of lists)
114    return min(val, 1000)
115
116
117def safe_range(val):
118    # As per range, but don't trust the value too much: cap it to a safety
119    # threshold in case the data was corrupted
120    return xrange(safety_limit(int(val)))
121
122if sys.version_info[0] >= 3:
123    def write_unicode(file, text):
124        file.write(text)
125else:
126    def write_unicode(file, text):
127        # Write a byte or unicode string to file. Unicode strings are encoded to
128        # ENCODING encoding with 'backslashreplace' error handler to avoid
129        # UnicodeEncodeError.
130        if isinstance(text, unicode):
131            text = text.encode(ENCODING, 'backslashreplace')
132        file.write(text)
133
134try:
135    os_fsencode = os.fsencode
136except AttributeError:
137    def os_fsencode(filename):
138        if not isinstance(filename, unicode):
139            return filename
140        encoding = sys.getfilesystemencoding()
141        if encoding == 'mbcs':
142            # mbcs doesn't support surrogateescape
143            return filename.encode(encoding)
144        encoded = []
145        for char in filename:
146            # surrogateescape error handler
147            if 0xDC80 <= ord(char) <= 0xDCFF:
148                byte = chr(ord(char) - 0xDC00)
149            else:
150                byte = char.encode(encoding)
151            encoded.append(byte)
152        return ''.join(encoded)
153
154class StringTruncated(RuntimeError):
155    pass
156
157class TruncatedStringIO(object):
158    '''Similar to io.StringIO, but can truncate the output by raising a
159    StringTruncated exception'''
160    def __init__(self, maxlen=None):
161        self._val = ''
162        self.maxlen = maxlen
163
164    def write(self, data):
165        if self.maxlen:
166            if len(data) + len(self._val) > self.maxlen:
167                # Truncation:
168                self._val += data[0:self.maxlen - len(self._val)]
169                raise StringTruncated()
170
171        self._val += data
172
173    def getvalue(self):
174        return self._val
175
176class PyObjectPtr(object):
177    """
178    Class wrapping a gdb.Value that's either a (PyObject*) within the
179    inferior process, or some subclass pointer e.g. (PyBytesObject*)
180
181    There will be a subclass for every refined PyObject type that we care
182    about.
183
184    Note that at every stage the underlying pointer could be NULL, point
185    to corrupt data, etc; this is the debugger, after all.
186    """
187    _typename = 'PyObject'
188
189    def __init__(self, gdbval, cast_to=None):
190        if cast_to:
191            self._gdbval = gdbval.cast(cast_to)
192        else:
193            self._gdbval = gdbval
194
195    def field(self, name):
196        '''
197        Get the gdb.Value for the given field within the PyObject, coping with
198        some python 2 versus python 3 differences.
199
200        Various libpython types are defined using the "PyObject_HEAD" and
201        "PyObject_VAR_HEAD" macros.
202
203        In Python 2, this these are defined so that "ob_type" and (for a var
204        object) "ob_size" are fields of the type in question.
205
206        In Python 3, this is defined as an embedded PyVarObject type thus:
207           PyVarObject ob_base;
208        so that the "ob_size" field is located insize the "ob_base" field, and
209        the "ob_type" is most easily accessed by casting back to a (PyObject*).
210        '''
211        if self.is_null():
212            raise NullPyObjectPtr(self)
213
214        if name == 'ob_type':
215            pyo_ptr = self._gdbval.cast(PyObjectPtr.get_gdb_type())
216            return pyo_ptr.dereference()[name]
217
218        if name == 'ob_size':
219            pyo_ptr = self._gdbval.cast(PyVarObjectPtr.get_gdb_type())
220            return pyo_ptr.dereference()[name]
221
222        # General case: look it up inside the object:
223        return self._gdbval.dereference()[name]
224
225    def pyop_field(self, name):
226        '''
227        Get a PyObjectPtr for the given PyObject* field within this PyObject,
228        coping with some python 2 versus python 3 differences.
229        '''
230        return PyObjectPtr.from_pyobject_ptr(self.field(name))
231
232    def write_field_repr(self, name, out, visited):
233        '''
234        Extract the PyObject* field named "name", and write its representation
235        to file-like object "out"
236        '''
237        field_obj = self.pyop_field(name)
238        field_obj.write_repr(out, visited)
239
240    def get_truncated_repr(self, maxlen):
241        '''
242        Get a repr-like string for the data, but truncate it at "maxlen" bytes
243        (ending the object graph traversal as soon as you do)
244        '''
245        out = TruncatedStringIO(maxlen)
246        try:
247            self.write_repr(out, set())
248        except StringTruncated:
249            # Truncation occurred:
250            return out.getvalue() + '...(truncated)'
251
252        # No truncation occurred:
253        return out.getvalue()
254
255    def type(self):
256        return PyTypeObjectPtr(self.field('ob_type'))
257
258    def is_null(self):
259        return 0 == long(self._gdbval)
260
261    def is_optimized_out(self):
262        '''
263        Is the value of the underlying PyObject* visible to the debugger?
264
265        This can vary with the precise version of the compiler used to build
266        Python, and the precise version of gdb.
267
268        See e.g. https://bugzilla.redhat.com/show_bug.cgi?id=556975 with
269        PyEval_EvalFrameEx's "f"
270        '''
271        return self._gdbval.is_optimized_out
272
273    def safe_tp_name(self):
274        try:
275            ob_type = self.type()
276            tp_name = ob_type.field('tp_name')
277            return tp_name.string()
278        # NullPyObjectPtr: NULL tp_name?
279        # RuntimeError: Can't even read the object at all?
280        # UnicodeDecodeError: Failed to decode tp_name bytestring
281        except (NullPyObjectPtr, RuntimeError, UnicodeDecodeError):
282            return 'unknown'
283
284    def proxyval(self, visited):
285        '''
286        Scrape a value from the inferior process, and try to represent it
287        within the gdb process, whilst (hopefully) avoiding crashes when
288        the remote data is corrupt.
289
290        Derived classes will override this.
291
292        For example, a PyIntObject* with ob_ival 42 in the inferior process
293        should result in an int(42) in this process.
294
295        visited: a set of all gdb.Value pyobject pointers already visited
296        whilst generating this value (to guard against infinite recursion when
297        visiting object graphs with loops).  Analogous to Py_ReprEnter and
298        Py_ReprLeave
299        '''
300
301        class FakeRepr(object):
302            """
303            Class representing a non-descript PyObject* value in the inferior
304            process for when we don't have a custom scraper, intended to have
305            a sane repr().
306            """
307
308            def __init__(self, tp_name, address):
309                self.tp_name = tp_name
310                self.address = address
311
312            def __repr__(self):
313                # For the NULL pointer, we have no way of knowing a type, so
314                # special-case it as per
315                # http://bugs.python.org/issue8032#msg100882
316                if self.address == 0:
317                    return '0x0'
318                return '<%s at remote 0x%x>' % (self.tp_name, self.address)
319
320        return FakeRepr(self.safe_tp_name(),
321                        long(self._gdbval))
322
323    def write_repr(self, out, visited):
324        '''
325        Write a string representation of the value scraped from the inferior
326        process to "out", a file-like object.
327        '''
328        # Default implementation: generate a proxy value and write its repr
329        # However, this could involve a lot of work for complicated objects,
330        # so for derived classes we specialize this
331        return out.write(repr(self.proxyval(visited)))
332
333    @classmethod
334    def subclass_from_type(cls, t):
335        '''
336        Given a PyTypeObjectPtr instance wrapping a gdb.Value that's a
337        (PyTypeObject*), determine the corresponding subclass of PyObjectPtr
338        to use
339
340        Ideally, we would look up the symbols for the global types, but that
341        isn't working yet:
342          (gdb) python print gdb.lookup_symbol('PyList_Type')[0].value
343          Traceback (most recent call last):
344            File "<string>", line 1, in <module>
345          NotImplementedError: Symbol type not yet supported in Python scripts.
346          Error while executing Python code.
347
348        For now, we use tp_flags, after doing some string comparisons on the
349        tp_name for some special-cases that don't seem to be visible through
350        flags
351        '''
352        try:
353            tp_name = t.field('tp_name').string()
354            tp_flags = int(t.field('tp_flags'))
355        # RuntimeError: NULL pointers
356        # UnicodeDecodeError: string() fails to decode the bytestring
357        except (RuntimeError, UnicodeDecodeError):
358            # Handle any kind of error e.g. NULL ptrs by simply using the base
359            # class
360            return cls
361
362        #print('tp_flags = 0x%08x' % tp_flags)
363        #print('tp_name = %r' % tp_name)
364
365        name_map = {'bool': PyBoolObjectPtr,
366                    'classobj': PyClassObjectPtr,
367                    'NoneType': PyNoneStructPtr,
368                    'frame': PyFrameObjectPtr,
369                    'set' : PySetObjectPtr,
370                    'frozenset' : PySetObjectPtr,
371                    'builtin_function_or_method' : PyCFunctionObjectPtr,
372                    'method-wrapper': wrapperobject,
373                    }
374        if tp_name in name_map:
375            return name_map[tp_name]
376
377        if tp_flags & Py_TPFLAGS_HEAPTYPE:
378            return HeapTypeObjectPtr
379
380        if tp_flags & Py_TPFLAGS_LONG_SUBCLASS:
381            return PyLongObjectPtr
382        if tp_flags & Py_TPFLAGS_LIST_SUBCLASS:
383            return PyListObjectPtr
384        if tp_flags & Py_TPFLAGS_TUPLE_SUBCLASS:
385            return PyTupleObjectPtr
386        if tp_flags & Py_TPFLAGS_BYTES_SUBCLASS:
387            return PyBytesObjectPtr
388        if tp_flags & Py_TPFLAGS_UNICODE_SUBCLASS:
389            return PyUnicodeObjectPtr
390        if tp_flags & Py_TPFLAGS_DICT_SUBCLASS:
391            return PyDictObjectPtr
392        if tp_flags & Py_TPFLAGS_BASE_EXC_SUBCLASS:
393            return PyBaseExceptionObjectPtr
394        #if tp_flags & Py_TPFLAGS_TYPE_SUBCLASS:
395        #    return PyTypeObjectPtr
396
397        # Use the base class:
398        return cls
399
400    @classmethod
401    def from_pyobject_ptr(cls, gdbval):
402        '''
403        Try to locate the appropriate derived class dynamically, and cast
404        the pointer accordingly.
405        '''
406        try:
407            p = PyObjectPtr(gdbval)
408            cls = cls.subclass_from_type(p.type())
409            return cls(gdbval, cast_to=cls.get_gdb_type())
410        except RuntimeError:
411            # Handle any kind of error e.g. NULL ptrs by simply using the base
412            # class
413            pass
414        return cls(gdbval)
415
416    @classmethod
417    def get_gdb_type(cls):
418        return gdb.lookup_type(cls._typename).pointer()
419
420    def as_address(self):
421        return long(self._gdbval)
422
423class PyVarObjectPtr(PyObjectPtr):
424    _typename = 'PyVarObject'
425
426class ProxyAlreadyVisited(object):
427    '''
428    Placeholder proxy to use when protecting against infinite recursion due to
429    loops in the object graph.
430
431    Analogous to the values emitted by the users of Py_ReprEnter and Py_ReprLeave
432    '''
433    def __init__(self, rep):
434        self._rep = rep
435
436    def __repr__(self):
437        return self._rep
438
439
440def _write_instance_repr(out, visited, name, pyop_attrdict, address):
441    '''Shared code for use by all classes:
442    write a representation to file-like object "out"'''
443    out.write('<')
444    out.write(name)
445
446    # Write dictionary of instance attributes:
447    if isinstance(pyop_attrdict, PyDictObjectPtr):
448        out.write('(')
449        first = True
450        for pyop_arg, pyop_val in pyop_attrdict.iteritems():
451            if not first:
452                out.write(', ')
453            first = False
454            out.write(pyop_arg.proxyval(visited))
455            out.write('=')
456            pyop_val.write_repr(out, visited)
457        out.write(')')
458    out.write(' at remote 0x%x>' % address)
459
460
461class InstanceProxy(object):
462
463    def __init__(self, cl_name, attrdict, address):
464        self.cl_name = cl_name
465        self.attrdict = attrdict
466        self.address = address
467
468    def __repr__(self):
469        if isinstance(self.attrdict, dict):
470            kwargs = ', '.join(["%s=%r" % (arg, val)
471                                for arg, val in self.attrdict.items()])
472            return '<%s(%s) at remote 0x%x>' % (self.cl_name,
473                                                kwargs, self.address)
474        else:
475            return '<%s at remote 0x%x>' % (self.cl_name,
476                                            self.address)
477
478def _PyObject_VAR_SIZE(typeobj, nitems):
479    if _PyObject_VAR_SIZE._type_size_t is None:
480        _PyObject_VAR_SIZE._type_size_t = gdb.lookup_type('size_t')
481
482    return ( ( typeobj.field('tp_basicsize') +
483               nitems * typeobj.field('tp_itemsize') +
484               (_sizeof_void_p() - 1)
485             ) & ~(_sizeof_void_p() - 1)
486           ).cast(_PyObject_VAR_SIZE._type_size_t)
487_PyObject_VAR_SIZE._type_size_t = None
488
489class HeapTypeObjectPtr(PyObjectPtr):
490    _typename = 'PyObject'
491
492    def get_attr_dict(self):
493        '''
494        Get the PyDictObject ptr representing the attribute dictionary
495        (or None if there's a problem)
496        '''
497        try:
498            typeobj = self.type()
499            dictoffset = int_from_int(typeobj.field('tp_dictoffset'))
500            if dictoffset != 0:
501                if dictoffset < 0:
502                    type_PyVarObject_ptr = gdb.lookup_type('PyVarObject').pointer()
503                    tsize = int_from_int(self._gdbval.cast(type_PyVarObject_ptr)['ob_size'])
504                    if tsize < 0:
505                        tsize = -tsize
506                    size = _PyObject_VAR_SIZE(typeobj, tsize)
507                    dictoffset += size
508                    assert dictoffset > 0
509                    assert dictoffset % _sizeof_void_p() == 0
510
511                dictptr = self._gdbval.cast(_type_char_ptr()) + dictoffset
512                PyObjectPtrPtr = PyObjectPtr.get_gdb_type().pointer()
513                dictptr = dictptr.cast(PyObjectPtrPtr)
514                return PyObjectPtr.from_pyobject_ptr(dictptr.dereference())
515        except RuntimeError:
516            # Corrupt data somewhere; fail safe
517            pass
518
519        # Not found, or some kind of error:
520        return None
521
522    def proxyval(self, visited):
523        '''
524        Support for classes.
525
526        Currently we just locate the dictionary using a transliteration to
527        python of _PyObject_GetDictPtr, ignoring descriptors
528        '''
529        # Guard against infinite loops:
530        if self.as_address() in visited:
531            return ProxyAlreadyVisited('<...>')
532        visited.add(self.as_address())
533
534        pyop_attr_dict = self.get_attr_dict()
535        if pyop_attr_dict:
536            attr_dict = pyop_attr_dict.proxyval(visited)
537        else:
538            attr_dict = {}
539        tp_name = self.safe_tp_name()
540
541        # Class:
542        return InstanceProxy(tp_name, attr_dict, long(self._gdbval))
543
544    def write_repr(self, out, visited):
545        # Guard against infinite loops:
546        if self.as_address() in visited:
547            out.write('<...>')
548            return
549        visited.add(self.as_address())
550
551        pyop_attrdict = self.get_attr_dict()
552        _write_instance_repr(out, visited,
553                             self.safe_tp_name(), pyop_attrdict, self.as_address())
554
555class ProxyException(Exception):
556    def __init__(self, tp_name, args):
557        self.tp_name = tp_name
558        self.args = args
559
560    def __repr__(self):
561        return '%s%r' % (self.tp_name, self.args)
562
563class PyBaseExceptionObjectPtr(PyObjectPtr):
564    """
565    Class wrapping a gdb.Value that's a PyBaseExceptionObject* i.e. an exception
566    within the process being debugged.
567    """
568    _typename = 'PyBaseExceptionObject'
569
570    def proxyval(self, visited):
571        # Guard against infinite loops:
572        if self.as_address() in visited:
573            return ProxyAlreadyVisited('(...)')
574        visited.add(self.as_address())
575        arg_proxy = self.pyop_field('args').proxyval(visited)
576        return ProxyException(self.safe_tp_name(),
577                              arg_proxy)
578
579    def write_repr(self, out, visited):
580        # Guard against infinite loops:
581        if self.as_address() in visited:
582            out.write('(...)')
583            return
584        visited.add(self.as_address())
585
586        out.write(self.safe_tp_name())
587        self.write_field_repr('args', out, visited)
588
589class PyClassObjectPtr(PyObjectPtr):
590    """
591    Class wrapping a gdb.Value that's a PyClassObject* i.e. a <classobj>
592    instance within the process being debugged.
593    """
594    _typename = 'PyClassObject'
595
596
597class BuiltInFunctionProxy(object):
598    def __init__(self, ml_name):
599        self.ml_name = ml_name
600
601    def __repr__(self):
602        return "<built-in function %s>" % self.ml_name
603
604class BuiltInMethodProxy(object):
605    def __init__(self, ml_name, pyop_m_self):
606        self.ml_name = ml_name
607        self.pyop_m_self = pyop_m_self
608
609    def __repr__(self):
610        return ('<built-in method %s of %s object at remote 0x%x>'
611                % (self.ml_name,
612                   self.pyop_m_self.safe_tp_name(),
613                   self.pyop_m_self.as_address())
614                )
615
616class PyCFunctionObjectPtr(PyObjectPtr):
617    """
618    Class wrapping a gdb.Value that's a PyCFunctionObject*
619    (see Include/methodobject.h and Objects/methodobject.c)
620    """
621    _typename = 'PyCFunctionObject'
622
623    def proxyval(self, visited):
624        m_ml = self.field('m_ml') # m_ml is a (PyMethodDef*)
625        try:
626            ml_name = m_ml['ml_name'].string()
627        except UnicodeDecodeError:
628            ml_name = '<ml_name:UnicodeDecodeError>'
629
630        pyop_m_self = self.pyop_field('m_self')
631        if pyop_m_self.is_null():
632            return BuiltInFunctionProxy(ml_name)
633        else:
634            return BuiltInMethodProxy(ml_name, pyop_m_self)
635
636
637class PyCodeObjectPtr(PyObjectPtr):
638    """
639    Class wrapping a gdb.Value that's a PyCodeObject* i.e. a <code> instance
640    within the process being debugged.
641    """
642    _typename = 'PyCodeObject'
643
644    def addr2line(self, addrq):
645        '''
646        Get the line number for a given bytecode offset
647
648        Analogous to PyCode_Addr2Line; translated from pseudocode in
649        Objects/lnotab_notes.txt
650        '''
651        co_linetable = self.pyop_field('co_linetable').proxyval(set())
652
653        # Initialize lineno to co_firstlineno as per PyCode_Addr2Line
654        # not 0, as lnotab_notes.txt has it:
655        lineno = int_from_int(self.field('co_firstlineno'))
656
657        if addrq < 0:
658            return lineno
659        addr = 0
660        for addr_incr, line_incr in zip(co_linetable[::2], co_linetable[1::2]):
661            if addr_incr == 255:
662                break
663            addr += ord(addr_incr)
664            line_delta = ord(line_incr)
665            if line_delta == 128:
666                line_delta = 0
667            elif line_delta > 128:
668                line_delta -= 256
669            lineno += line_delta
670            if addr > addrq:
671                return lineno
672        assert False, "Unreachable"
673
674
675class PyDictObjectPtr(PyObjectPtr):
676    """
677    Class wrapping a gdb.Value that's a PyDictObject* i.e. a dict instance
678    within the process being debugged.
679    """
680    _typename = 'PyDictObject'
681
682    def iteritems(self):
683        '''
684        Yields a sequence of (PyObjectPtr key, PyObjectPtr value) pairs,
685        analogous to dict.iteritems()
686        '''
687        keys = self.field('ma_keys')
688        values = self.field('ma_values')
689        entries, nentries = self._get_entries(keys)
690        for i in safe_range(nentries):
691            ep = entries[i]
692            if long(values):
693                pyop_value = PyObjectPtr.from_pyobject_ptr(values[i])
694            else:
695                pyop_value = PyObjectPtr.from_pyobject_ptr(ep['me_value'])
696            if not pyop_value.is_null():
697                pyop_key = PyObjectPtr.from_pyobject_ptr(ep['me_key'])
698                yield (pyop_key, pyop_value)
699
700    def proxyval(self, visited):
701        # Guard against infinite loops:
702        if self.as_address() in visited:
703            return ProxyAlreadyVisited('{...}')
704        visited.add(self.as_address())
705
706        result = {}
707        for pyop_key, pyop_value in self.iteritems():
708            proxy_key = pyop_key.proxyval(visited)
709            proxy_value = pyop_value.proxyval(visited)
710            result[proxy_key] = proxy_value
711        return result
712
713    def write_repr(self, out, visited):
714        # Guard against infinite loops:
715        if self.as_address() in visited:
716            out.write('{...}')
717            return
718        visited.add(self.as_address())
719
720        out.write('{')
721        first = True
722        for pyop_key, pyop_value in self.iteritems():
723            if not first:
724                out.write(', ')
725            first = False
726            pyop_key.write_repr(out, visited)
727            out.write(': ')
728            pyop_value.write_repr(out, visited)
729        out.write('}')
730
731    def _get_entries(self, keys):
732        dk_nentries = int(keys['dk_nentries'])
733        dk_size = int(keys['dk_size'])
734        try:
735            # <= Python 3.5
736            return keys['dk_entries'], dk_size
737        except RuntimeError:
738            # >= Python 3.6
739            pass
740
741        if dk_size <= 0xFF:
742            offset = dk_size
743        elif dk_size <= 0xFFFF:
744            offset = 2 * dk_size
745        elif dk_size <= 0xFFFFFFFF:
746            offset = 4 * dk_size
747        else:
748            offset = 8 * dk_size
749
750        ent_addr = keys['dk_indices'].address
751        ent_addr = ent_addr.cast(_type_unsigned_char_ptr()) + offset
752        ent_ptr_t = gdb.lookup_type('PyDictKeyEntry').pointer()
753        ent_addr = ent_addr.cast(ent_ptr_t)
754
755        return ent_addr, dk_nentries
756
757
758class PyListObjectPtr(PyObjectPtr):
759    _typename = 'PyListObject'
760
761    def __getitem__(self, i):
762        # Get the gdb.Value for the (PyObject*) with the given index:
763        field_ob_item = self.field('ob_item')
764        return field_ob_item[i]
765
766    def proxyval(self, visited):
767        # Guard against infinite loops:
768        if self.as_address() in visited:
769            return ProxyAlreadyVisited('[...]')
770        visited.add(self.as_address())
771
772        result = [PyObjectPtr.from_pyobject_ptr(self[i]).proxyval(visited)
773                  for i in safe_range(int_from_int(self.field('ob_size')))]
774        return result
775
776    def write_repr(self, out, visited):
777        # Guard against infinite loops:
778        if self.as_address() in visited:
779            out.write('[...]')
780            return
781        visited.add(self.as_address())
782
783        out.write('[')
784        for i in safe_range(int_from_int(self.field('ob_size'))):
785            if i > 0:
786                out.write(', ')
787            element = PyObjectPtr.from_pyobject_ptr(self[i])
788            element.write_repr(out, visited)
789        out.write(']')
790
791class PyLongObjectPtr(PyObjectPtr):
792    _typename = 'PyLongObject'
793
794    def proxyval(self, visited):
795        '''
796        Python's Include/longobjrep.h has this declaration:
797           struct _longobject {
798               PyObject_VAR_HEAD
799               digit ob_digit[1];
800           };
801
802        with this description:
803            The absolute value of a number is equal to
804                 SUM(for i=0 through abs(ob_size)-1) ob_digit[i] * 2**(SHIFT*i)
805            Negative numbers are represented with ob_size < 0;
806            zero is represented by ob_size == 0.
807
808        where SHIFT can be either:
809            #define PyLong_SHIFT        30
810            #define PyLong_SHIFT        15
811        '''
812        ob_size = long(self.field('ob_size'))
813        if ob_size == 0:
814            return 0
815
816        ob_digit = self.field('ob_digit')
817
818        if gdb.lookup_type('digit').sizeof == 2:
819            SHIFT = 15
820        else:
821            SHIFT = 30
822
823        digits = [long(ob_digit[i]) * 2**(SHIFT*i)
824                  for i in safe_range(abs(ob_size))]
825        result = sum(digits)
826        if ob_size < 0:
827            result = -result
828        return result
829
830    def write_repr(self, out, visited):
831        # Write this out as a Python 3 int literal, i.e. without the "L" suffix
832        proxy = self.proxyval(visited)
833        out.write("%s" % proxy)
834
835
836class PyBoolObjectPtr(PyLongObjectPtr):
837    """
838    Class wrapping a gdb.Value that's a PyBoolObject* i.e. one of the two
839    <bool> instances (Py_True/Py_False) within the process being debugged.
840    """
841    def proxyval(self, visited):
842        if PyLongObjectPtr.proxyval(self, visited):
843            return True
844        else:
845            return False
846
847class PyNoneStructPtr(PyObjectPtr):
848    """
849    Class wrapping a gdb.Value that's a PyObject* pointing to the
850    singleton (we hope) _Py_NoneStruct with ob_type PyNone_Type
851    """
852    _typename = 'PyObject'
853
854    def proxyval(self, visited):
855        return None
856
857
858class PyFrameObjectPtr(PyObjectPtr):
859    _typename = 'PyFrameObject'
860
861    def __init__(self, gdbval, cast_to=None):
862        PyObjectPtr.__init__(self, gdbval, cast_to)
863
864        if not self.is_optimized_out():
865            self.co = PyCodeObjectPtr.from_pyobject_ptr(self.field('f_code'))
866            self.co_name = self.co.pyop_field('co_name')
867            self.co_filename = self.co.pyop_field('co_filename')
868
869            self.f_lineno = int_from_int(self.field('f_lineno'))
870            self.f_lasti = int_from_int(self.field('f_lasti'))
871            self.co_nlocals = int_from_int(self.co.field('co_nlocals'))
872            self.co_varnames = PyTupleObjectPtr.from_pyobject_ptr(self.co.field('co_varnames'))
873
874    def iter_locals(self):
875        '''
876        Yield a sequence of (name,value) pairs of PyObjectPtr instances, for
877        the local variables of this frame
878        '''
879        if self.is_optimized_out():
880            return
881
882        f_localsplus = self.field('f_localsplus')
883        for i in safe_range(self.co_nlocals):
884            pyop_value = PyObjectPtr.from_pyobject_ptr(f_localsplus[i])
885            if not pyop_value.is_null():
886                pyop_name = PyObjectPtr.from_pyobject_ptr(self.co_varnames[i])
887                yield (pyop_name, pyop_value)
888
889    def iter_globals(self):
890        '''
891        Yield a sequence of (name,value) pairs of PyObjectPtr instances, for
892        the global variables of this frame
893        '''
894        if self.is_optimized_out():
895            return ()
896
897        pyop_globals = self.pyop_field('f_globals')
898        return pyop_globals.iteritems()
899
900    def iter_builtins(self):
901        '''
902        Yield a sequence of (name,value) pairs of PyObjectPtr instances, for
903        the builtin variables
904        '''
905        if self.is_optimized_out():
906            return ()
907
908        pyop_builtins = self.pyop_field('f_builtins')
909        return pyop_builtins.iteritems()
910
911    def get_var_by_name(self, name):
912        '''
913        Look for the named local variable, returning a (PyObjectPtr, scope) pair
914        where scope is a string 'local', 'global', 'builtin'
915
916        If not found, return (None, None)
917        '''
918        for pyop_name, pyop_value in self.iter_locals():
919            if name == pyop_name.proxyval(set()):
920                return pyop_value, 'local'
921        for pyop_name, pyop_value in self.iter_globals():
922            if name == pyop_name.proxyval(set()):
923                return pyop_value, 'global'
924        for pyop_name, pyop_value in self.iter_builtins():
925            if name == pyop_name.proxyval(set()):
926                return pyop_value, 'builtin'
927        return None, None
928
929    def filename(self):
930        '''Get the path of the current Python source file, as a string'''
931        if self.is_optimized_out():
932            return FRAME_INFO_OPTIMIZED_OUT
933        return self.co_filename.proxyval(set())
934
935    def current_line_num(self):
936        '''Get current line number as an integer (1-based)
937
938        Translated from PyFrame_GetLineNumber and PyCode_Addr2Line
939
940        See Objects/lnotab_notes.txt
941        '''
942        if self.is_optimized_out():
943            return None
944        f_trace = self.field('f_trace')
945        if long(f_trace) != 0:
946            # we have a non-NULL f_trace:
947            return self.f_lineno
948
949        try:
950            return self.co.addr2line(self.f_lasti*2)
951        except Exception:
952            # bpo-34989: addr2line() is a complex function, it can fail in many
953            # ways. For example, it fails with a TypeError on "FakeRepr" if
954            # gdb fails to load debug symbols. Use a catch-all "except
955            # Exception" to make the whole function safe. The caller has to
956            # handle None anyway for optimized Python.
957            return None
958
959    def current_line(self):
960        '''Get the text of the current source line as a string, with a trailing
961        newline character'''
962        if self.is_optimized_out():
963            return FRAME_INFO_OPTIMIZED_OUT
964
965        lineno = self.current_line_num()
966        if lineno is None:
967            return '(failed to get frame line number)'
968
969        filename = self.filename()
970        try:
971            with open(os_fsencode(filename), 'r') as fp:
972                lines = fp.readlines()
973        except IOError:
974            return None
975
976        try:
977            # Convert from 1-based current_line_num to 0-based list offset
978            return lines[lineno - 1]
979        except IndexError:
980            return None
981
982    def write_repr(self, out, visited):
983        if self.is_optimized_out():
984            out.write(FRAME_INFO_OPTIMIZED_OUT)
985            return
986        lineno = self.current_line_num()
987        lineno = str(lineno) if lineno is not None else "?"
988        out.write('Frame 0x%x, for file %s, line %s, in %s ('
989                  % (self.as_address(),
990                     self.co_filename.proxyval(visited),
991                     lineno,
992                     self.co_name.proxyval(visited)))
993        first = True
994        for pyop_name, pyop_value in self.iter_locals():
995            if not first:
996                out.write(', ')
997            first = False
998
999            out.write(pyop_name.proxyval(visited))
1000            out.write('=')
1001            pyop_value.write_repr(out, visited)
1002
1003        out.write(')')
1004
1005    def print_traceback(self):
1006        if self.is_optimized_out():
1007            sys.stdout.write('  %s\n' % FRAME_INFO_OPTIMIZED_OUT)
1008            return
1009        visited = set()
1010        lineno = self.current_line_num()
1011        lineno = str(lineno) if lineno is not None else "?"
1012        sys.stdout.write('  File "%s", line %s, in %s\n'
1013                  % (self.co_filename.proxyval(visited),
1014                     lineno,
1015                     self.co_name.proxyval(visited)))
1016
1017class PySetObjectPtr(PyObjectPtr):
1018    _typename = 'PySetObject'
1019
1020    @classmethod
1021    def _dummy_key(self):
1022        return gdb.lookup_global_symbol('_PySet_Dummy').value()
1023
1024    def __iter__(self):
1025        dummy_ptr = self._dummy_key()
1026        table = self.field('table')
1027        for i in safe_range(self.field('mask') + 1):
1028            setentry = table[i]
1029            key = setentry['key']
1030            if key != 0 and key != dummy_ptr:
1031                yield PyObjectPtr.from_pyobject_ptr(key)
1032
1033    def proxyval(self, visited):
1034        # Guard against infinite loops:
1035        if self.as_address() in visited:
1036            return ProxyAlreadyVisited('%s(...)' % self.safe_tp_name())
1037        visited.add(self.as_address())
1038
1039        members = (key.proxyval(visited) for key in self)
1040        if self.safe_tp_name() == 'frozenset':
1041            return frozenset(members)
1042        else:
1043            return set(members)
1044
1045    def write_repr(self, out, visited):
1046        # Emulate Python 3's set_repr
1047        tp_name = self.safe_tp_name()
1048
1049        # Guard against infinite loops:
1050        if self.as_address() in visited:
1051            out.write('(...)')
1052            return
1053        visited.add(self.as_address())
1054
1055        # Python 3's set_repr special-cases the empty set:
1056        if not self.field('used'):
1057            out.write(tp_name)
1058            out.write('()')
1059            return
1060
1061        # Python 3 uses {} for set literals:
1062        if tp_name != 'set':
1063            out.write(tp_name)
1064            out.write('(')
1065
1066        out.write('{')
1067        first = True
1068        for key in self:
1069            if not first:
1070                out.write(', ')
1071            first = False
1072            key.write_repr(out, visited)
1073        out.write('}')
1074
1075        if tp_name != 'set':
1076            out.write(')')
1077
1078
1079class PyBytesObjectPtr(PyObjectPtr):
1080    _typename = 'PyBytesObject'
1081
1082    def __str__(self):
1083        field_ob_size = self.field('ob_size')
1084        field_ob_sval = self.field('ob_sval')
1085        char_ptr = field_ob_sval.address.cast(_type_unsigned_char_ptr())
1086        return ''.join([chr(char_ptr[i]) for i in safe_range(field_ob_size)])
1087
1088    def proxyval(self, visited):
1089        return str(self)
1090
1091    def write_repr(self, out, visited):
1092        # Write this out as a Python 3 bytes literal, i.e. with a "b" prefix
1093
1094        # Get a PyStringObject* within the Python 2 gdb process:
1095        proxy = self.proxyval(visited)
1096
1097        # Transliteration of Python 3's Objects/bytesobject.c:PyBytes_Repr
1098        # to Python 2 code:
1099        quote = "'"
1100        if "'" in proxy and not '"' in proxy:
1101            quote = '"'
1102        out.write('b')
1103        out.write(quote)
1104        for byte in proxy:
1105            if byte == quote or byte == '\\':
1106                out.write('\\')
1107                out.write(byte)
1108            elif byte == '\t':
1109                out.write('\\t')
1110            elif byte == '\n':
1111                out.write('\\n')
1112            elif byte == '\r':
1113                out.write('\\r')
1114            elif byte < ' ' or ord(byte) >= 0x7f:
1115                out.write('\\x')
1116                out.write(hexdigits[(ord(byte) & 0xf0) >> 4])
1117                out.write(hexdigits[ord(byte) & 0xf])
1118            else:
1119                out.write(byte)
1120        out.write(quote)
1121
1122class PyTupleObjectPtr(PyObjectPtr):
1123    _typename = 'PyTupleObject'
1124
1125    def __getitem__(self, i):
1126        # Get the gdb.Value for the (PyObject*) with the given index:
1127        field_ob_item = self.field('ob_item')
1128        return field_ob_item[i]
1129
1130    def proxyval(self, visited):
1131        # Guard against infinite loops:
1132        if self.as_address() in visited:
1133            return ProxyAlreadyVisited('(...)')
1134        visited.add(self.as_address())
1135
1136        result = tuple(PyObjectPtr.from_pyobject_ptr(self[i]).proxyval(visited)
1137                       for i in safe_range(int_from_int(self.field('ob_size'))))
1138        return result
1139
1140    def write_repr(self, out, visited):
1141        # Guard against infinite loops:
1142        if self.as_address() in visited:
1143            out.write('(...)')
1144            return
1145        visited.add(self.as_address())
1146
1147        out.write('(')
1148        for i in safe_range(int_from_int(self.field('ob_size'))):
1149            if i > 0:
1150                out.write(', ')
1151            element = PyObjectPtr.from_pyobject_ptr(self[i])
1152            element.write_repr(out, visited)
1153        if self.field('ob_size') == 1:
1154            out.write(',)')
1155        else:
1156            out.write(')')
1157
1158class PyTypeObjectPtr(PyObjectPtr):
1159    _typename = 'PyTypeObject'
1160
1161
1162def _unichr_is_printable(char):
1163    # Logic adapted from Python 3's Tools/unicode/makeunicodedata.py
1164    if char == u" ":
1165        return True
1166    import unicodedata
1167    return unicodedata.category(char) not in ("C", "Z")
1168
1169if sys.maxunicode >= 0x10000:
1170    _unichr = unichr
1171else:
1172    # Needed for proper surrogate support if sizeof(Py_UNICODE) is 2 in gdb
1173    def _unichr(x):
1174        if x < 0x10000:
1175            return unichr(x)
1176        x -= 0x10000
1177        ch1 = 0xD800 | (x >> 10)
1178        ch2 = 0xDC00 | (x & 0x3FF)
1179        return unichr(ch1) + unichr(ch2)
1180
1181
1182class PyUnicodeObjectPtr(PyObjectPtr):
1183    _typename = 'PyUnicodeObject'
1184
1185    def char_width(self):
1186        _type_Py_UNICODE = gdb.lookup_type('Py_UNICODE')
1187        return _type_Py_UNICODE.sizeof
1188
1189    def proxyval(self, visited):
1190        global _is_pep393
1191        if _is_pep393 is None:
1192            fields = gdb.lookup_type('PyUnicodeObject').fields()
1193            _is_pep393 = 'data' in [f.name for f in fields]
1194        if _is_pep393:
1195            # Python 3.3 and newer
1196            may_have_surrogates = False
1197            compact = self.field('_base')
1198            ascii = compact['_base']
1199            state = ascii['state']
1200            is_compact_ascii = (int(state['ascii']) and int(state['compact']))
1201            if not int(state['ready']):
1202                # string is not ready
1203                field_length = long(compact['wstr_length'])
1204                may_have_surrogates = True
1205                field_str = ascii['wstr']
1206            else:
1207                field_length = long(ascii['length'])
1208                if is_compact_ascii:
1209                    field_str = ascii.address + 1
1210                elif int(state['compact']):
1211                    field_str = compact.address + 1
1212                else:
1213                    field_str = self.field('data')['any']
1214                repr_kind = int(state['kind'])
1215                if repr_kind == 1:
1216                    field_str = field_str.cast(_type_unsigned_char_ptr())
1217                elif repr_kind == 2:
1218                    field_str = field_str.cast(_type_unsigned_short_ptr())
1219                elif repr_kind == 4:
1220                    field_str = field_str.cast(_type_unsigned_int_ptr())
1221        else:
1222            # Python 3.2 and earlier
1223            field_length = long(self.field('length'))
1224            field_str = self.field('str')
1225            may_have_surrogates = self.char_width() == 2
1226
1227        # Gather a list of ints from the Py_UNICODE array; these are either
1228        # UCS-1, UCS-2 or UCS-4 code points:
1229        if not may_have_surrogates:
1230            Py_UNICODEs = [int(field_str[i]) for i in safe_range(field_length)]
1231        else:
1232            # A more elaborate routine if sizeof(Py_UNICODE) is 2 in the
1233            # inferior process: we must join surrogate pairs.
1234            Py_UNICODEs = []
1235            i = 0
1236            limit = safety_limit(field_length)
1237            while i < limit:
1238                ucs = int(field_str[i])
1239                i += 1
1240                if ucs < 0xD800 or ucs >= 0xDC00 or i == field_length:
1241                    Py_UNICODEs.append(ucs)
1242                    continue
1243                # This could be a surrogate pair.
1244                ucs2 = int(field_str[i])
1245                if ucs2 < 0xDC00 or ucs2 > 0xDFFF:
1246                    continue
1247                code = (ucs & 0x03FF) << 10
1248                code |= ucs2 & 0x03FF
1249                code += 0x00010000
1250                Py_UNICODEs.append(code)
1251                i += 1
1252
1253        # Convert the int code points to unicode characters, and generate a
1254        # local unicode instance.
1255        # This splits surrogate pairs if sizeof(Py_UNICODE) is 2 here (in gdb).
1256        result = u''.join([
1257            (_unichr(ucs) if ucs <= 0x10ffff else '\ufffd')
1258            for ucs in Py_UNICODEs])
1259        return result
1260
1261    def write_repr(self, out, visited):
1262        # Write this out as a Python 3 str literal, i.e. without a "u" prefix
1263
1264        # Get a PyUnicodeObject* within the Python 2 gdb process:
1265        proxy = self.proxyval(visited)
1266
1267        # Transliteration of Python 3's Object/unicodeobject.c:unicode_repr
1268        # to Python 2:
1269        if "'" in proxy and '"' not in proxy:
1270            quote = '"'
1271        else:
1272            quote = "'"
1273        out.write(quote)
1274
1275        i = 0
1276        while i < len(proxy):
1277            ch = proxy[i]
1278            i += 1
1279
1280            # Escape quotes and backslashes
1281            if ch == quote or ch == '\\':
1282                out.write('\\')
1283                out.write(ch)
1284
1285            #  Map special whitespace to '\t', \n', '\r'
1286            elif ch == '\t':
1287                out.write('\\t')
1288            elif ch == '\n':
1289                out.write('\\n')
1290            elif ch == '\r':
1291                out.write('\\r')
1292
1293            # Map non-printable US ASCII to '\xhh' */
1294            elif ch < ' ' or ch == 0x7F:
1295                out.write('\\x')
1296                out.write(hexdigits[(ord(ch) >> 4) & 0x000F])
1297                out.write(hexdigits[ord(ch) & 0x000F])
1298
1299            # Copy ASCII characters as-is
1300            elif ord(ch) < 0x7F:
1301                out.write(ch)
1302
1303            # Non-ASCII characters
1304            else:
1305                ucs = ch
1306                ch2 = None
1307                if sys.maxunicode < 0x10000:
1308                    # If sizeof(Py_UNICODE) is 2 here (in gdb), join
1309                    # surrogate pairs before calling _unichr_is_printable.
1310                    if (i < len(proxy)
1311                    and 0xD800 <= ord(ch) < 0xDC00 \
1312                    and 0xDC00 <= ord(proxy[i]) <= 0xDFFF):
1313                        ch2 = proxy[i]
1314                        ucs = ch + ch2
1315                        i += 1
1316
1317                # Unfortuately, Python 2's unicode type doesn't seem
1318                # to expose the "isprintable" method
1319                printable = _unichr_is_printable(ucs)
1320                if printable:
1321                    try:
1322                        ucs.encode(ENCODING)
1323                    except UnicodeEncodeError:
1324                        printable = False
1325
1326                # Map Unicode whitespace and control characters
1327                # (categories Z* and C* except ASCII space)
1328                if not printable:
1329                    if ch2 is not None:
1330                        # Match Python 3's representation of non-printable
1331                        # wide characters.
1332                        code = (ord(ch) & 0x03FF) << 10
1333                        code |= ord(ch2) & 0x03FF
1334                        code += 0x00010000
1335                    else:
1336                        code = ord(ucs)
1337
1338                    # Map 8-bit characters to '\\xhh'
1339                    if code <= 0xff:
1340                        out.write('\\x')
1341                        out.write(hexdigits[(code >> 4) & 0x000F])
1342                        out.write(hexdigits[code & 0x000F])
1343                    # Map 21-bit characters to '\U00xxxxxx'
1344                    elif code >= 0x10000:
1345                        out.write('\\U')
1346                        out.write(hexdigits[(code >> 28) & 0x0000000F])
1347                        out.write(hexdigits[(code >> 24) & 0x0000000F])
1348                        out.write(hexdigits[(code >> 20) & 0x0000000F])
1349                        out.write(hexdigits[(code >> 16) & 0x0000000F])
1350                        out.write(hexdigits[(code >> 12) & 0x0000000F])
1351                        out.write(hexdigits[(code >> 8) & 0x0000000F])
1352                        out.write(hexdigits[(code >> 4) & 0x0000000F])
1353                        out.write(hexdigits[code & 0x0000000F])
1354                    # Map 16-bit characters to '\uxxxx'
1355                    else:
1356                        out.write('\\u')
1357                        out.write(hexdigits[(code >> 12) & 0x000F])
1358                        out.write(hexdigits[(code >> 8) & 0x000F])
1359                        out.write(hexdigits[(code >> 4) & 0x000F])
1360                        out.write(hexdigits[code & 0x000F])
1361                else:
1362                    # Copy characters as-is
1363                    out.write(ch)
1364                    if ch2 is not None:
1365                        out.write(ch2)
1366
1367        out.write(quote)
1368
1369
1370class wrapperobject(PyObjectPtr):
1371    _typename = 'wrapperobject'
1372
1373    def safe_name(self):
1374        try:
1375            name = self.field('descr')['d_base']['name'].string()
1376            return repr(name)
1377        except (NullPyObjectPtr, RuntimeError, UnicodeDecodeError):
1378            return '<unknown name>'
1379
1380    def safe_tp_name(self):
1381        try:
1382            return self.field('self')['ob_type']['tp_name'].string()
1383        except (NullPyObjectPtr, RuntimeError, UnicodeDecodeError):
1384            return '<unknown tp_name>'
1385
1386    def safe_self_addresss(self):
1387        try:
1388            address = long(self.field('self'))
1389            return '%#x' % address
1390        except (NullPyObjectPtr, RuntimeError):
1391            return '<failed to get self address>'
1392
1393    def proxyval(self, visited):
1394        name = self.safe_name()
1395        tp_name = self.safe_tp_name()
1396        self_address = self.safe_self_addresss()
1397        return ("<method-wrapper %s of %s object at %s>"
1398                % (name, tp_name, self_address))
1399
1400    def write_repr(self, out, visited):
1401        proxy = self.proxyval(visited)
1402        out.write(proxy)
1403
1404
1405def int_from_int(gdbval):
1406    return int(gdbval)
1407
1408
1409def stringify(val):
1410    # TODO: repr() puts everything on one line; pformat can be nicer, but
1411    # can lead to v.long results; this function isolates the choice
1412    if True:
1413        return repr(val)
1414    else:
1415        from pprint import pformat
1416        return pformat(val)
1417
1418
1419class PyObjectPtrPrinter:
1420    "Prints a (PyObject*)"
1421
1422    def __init__ (self, gdbval):
1423        self.gdbval = gdbval
1424
1425    def to_string (self):
1426        pyop = PyObjectPtr.from_pyobject_ptr(self.gdbval)
1427        if True:
1428            return pyop.get_truncated_repr(MAX_OUTPUT_LEN)
1429        else:
1430            # Generate full proxy value then stringify it.
1431            # Doing so could be expensive
1432            proxyval = pyop.proxyval(set())
1433            return stringify(proxyval)
1434
1435def pretty_printer_lookup(gdbval):
1436    type = gdbval.type.unqualified()
1437    if type.code != gdb.TYPE_CODE_PTR:
1438        return None
1439
1440    type = type.target().unqualified()
1441    t = str(type)
1442    if t in ("PyObject", "PyFrameObject", "PyUnicodeObject", "wrapperobject"):
1443        return PyObjectPtrPrinter(gdbval)
1444
1445"""
1446During development, I've been manually invoking the code in this way:
1447(gdb) python
1448
1449import sys
1450sys.path.append('/home/david/coding/python-gdb')
1451import libpython
1452end
1453
1454then reloading it after each edit like this:
1455(gdb) python reload(libpython)
1456
1457The following code should ensure that the prettyprinter is registered
1458if the code is autoloaded by gdb when visiting libpython.so, provided
1459that this python file is installed to the same path as the library (or its
1460.debug file) plus a "-gdb.py" suffix, e.g:
1461  /usr/lib/libpython2.6.so.1.0-gdb.py
1462  /usr/lib/debug/usr/lib/libpython2.6.so.1.0.debug-gdb.py
1463"""
1464def register (obj):
1465    if obj is None:
1466        obj = gdb
1467
1468    # Wire up the pretty-printer
1469    obj.pretty_printers.append(pretty_printer_lookup)
1470
1471register (gdb.current_objfile ())
1472
1473
1474
1475# Unfortunately, the exact API exposed by the gdb module varies somewhat
1476# from build to build
1477# See http://bugs.python.org/issue8279?#msg102276
1478
1479class Frame(object):
1480    '''
1481    Wrapper for gdb.Frame, adding various methods
1482    '''
1483    def __init__(self, gdbframe):
1484        self._gdbframe = gdbframe
1485
1486    def older(self):
1487        older = self._gdbframe.older()
1488        if older:
1489            return Frame(older)
1490        else:
1491            return None
1492
1493    def newer(self):
1494        newer = self._gdbframe.newer()
1495        if newer:
1496            return Frame(newer)
1497        else:
1498            return None
1499
1500    def select(self):
1501        '''If supported, select this frame and return True; return False if unsupported
1502
1503        Not all builds have a gdb.Frame.select method; seems to be present on Fedora 12
1504        onwards, but absent on Ubuntu buildbot'''
1505        if not hasattr(self._gdbframe, 'select'):
1506            print ('Unable to select frame: '
1507                   'this build of gdb does not expose a gdb.Frame.select method')
1508            return False
1509        self._gdbframe.select()
1510        return True
1511
1512    def get_index(self):
1513        '''Calculate index of frame, starting at 0 for the newest frame within
1514        this thread'''
1515        index = 0
1516        # Go down until you reach the newest frame:
1517        iter_frame = self
1518        while iter_frame.newer():
1519            index += 1
1520            iter_frame = iter_frame.newer()
1521        return index
1522
1523    # We divide frames into:
1524    #   - "python frames":
1525    #       - "bytecode frames" i.e. PyEval_EvalFrameEx
1526    #       - "other python frames": things that are of interest from a python
1527    #         POV, but aren't bytecode (e.g. GC, GIL)
1528    #   - everything else
1529
1530    def is_python_frame(self):
1531        '''Is this a _PyEval_EvalFrameDefault frame, or some other important
1532        frame? (see is_other_python_frame for what "important" means in this
1533        context)'''
1534        if self.is_evalframe():
1535            return True
1536        if self.is_other_python_frame():
1537            return True
1538        return False
1539
1540    def is_evalframe(self):
1541        '''Is this a _PyEval_EvalFrameDefault frame?'''
1542        if self._gdbframe.name() == EVALFRAME:
1543            '''
1544            I believe we also need to filter on the inline
1545            struct frame_id.inline_depth, only regarding frames with
1546            an inline depth of 0 as actually being this function
1547
1548            So we reject those with type gdb.INLINE_FRAME
1549            '''
1550            if self._gdbframe.type() == gdb.NORMAL_FRAME:
1551                # We have a _PyEval_EvalFrameDefault frame:
1552                return True
1553
1554        return False
1555
1556    def is_other_python_frame(self):
1557        '''Is this frame worth displaying in python backtraces?
1558        Examples:
1559          - waiting on the GIL
1560          - garbage-collecting
1561          - within a CFunction
1562         If it is, return a descriptive string
1563         For other frames, return False
1564         '''
1565        if self.is_waiting_for_gil():
1566            return 'Waiting for the GIL'
1567
1568        if self.is_gc_collect():
1569            return 'Garbage-collecting'
1570
1571        # Detect invocations of PyCFunction instances:
1572        frame = self._gdbframe
1573        caller = frame.name()
1574        if not caller:
1575            return False
1576
1577        if (caller.startswith('cfunction_vectorcall_') or
1578            caller == 'cfunction_call'):
1579            arg_name = 'func'
1580            # Within that frame:
1581            #   "func" is the local containing the PyObject* of the
1582            # PyCFunctionObject instance
1583            #   "f" is the same value, but cast to (PyCFunctionObject*)
1584            #   "self" is the (PyObject*) of the 'self'
1585            try:
1586                # Use the prettyprinter for the func:
1587                func = frame.read_var(arg_name)
1588                return str(func)
1589            except ValueError:
1590                return ('PyCFunction invocation (unable to read %s: '
1591                        'missing debuginfos?)' % arg_name)
1592            except RuntimeError:
1593                return 'PyCFunction invocation (unable to read %s)' % arg_name
1594
1595        if caller == 'wrapper_call':
1596            arg_name = 'wp'
1597            try:
1598                func = frame.read_var(arg_name)
1599                return str(func)
1600            except ValueError:
1601                return ('<wrapper_call invocation (unable to read %s: '
1602                        'missing debuginfos?)>' % arg_name)
1603            except RuntimeError:
1604                return '<wrapper_call invocation (unable to read %s)>' % arg_name
1605
1606        # This frame isn't worth reporting:
1607        return False
1608
1609    def is_waiting_for_gil(self):
1610        '''Is this frame waiting on the GIL?'''
1611        # This assumes the _POSIX_THREADS version of Python/ceval_gil.h:
1612        name = self._gdbframe.name()
1613        if name:
1614            return (name == 'take_gil')
1615
1616    def is_gc_collect(self):
1617        '''Is this frame gc_collect_main() within the garbage-collector?'''
1618        return self._gdbframe.name() in ('collect', 'gc_collect_main')
1619
1620    def get_pyop(self):
1621        try:
1622            f = self._gdbframe.read_var('f')
1623            frame = PyFrameObjectPtr.from_pyobject_ptr(f)
1624            if not frame.is_optimized_out():
1625                return frame
1626            # gdb is unable to get the "f" argument of PyEval_EvalFrameEx()
1627            # because it was "optimized out". Try to get "f" from the frame
1628            # of the caller, PyEval_EvalCodeEx().
1629            orig_frame = frame
1630            caller = self._gdbframe.older()
1631            if caller:
1632                f = caller.read_var('f')
1633                frame = PyFrameObjectPtr.from_pyobject_ptr(f)
1634                if not frame.is_optimized_out():
1635                    return frame
1636            return orig_frame
1637        except ValueError:
1638            return None
1639
1640    @classmethod
1641    def get_selected_frame(cls):
1642        _gdbframe = gdb.selected_frame()
1643        if _gdbframe:
1644            return Frame(_gdbframe)
1645        return None
1646
1647    @classmethod
1648    def get_selected_python_frame(cls):
1649        '''Try to obtain the Frame for the python-related code in the selected
1650        frame, or None'''
1651        try:
1652            frame = cls.get_selected_frame()
1653        except gdb.error:
1654            # No frame: Python didn't start yet
1655            return None
1656
1657        while frame:
1658            if frame.is_python_frame():
1659                return frame
1660            frame = frame.older()
1661
1662        # Not found:
1663        return None
1664
1665    @classmethod
1666    def get_selected_bytecode_frame(cls):
1667        '''Try to obtain the Frame for the python bytecode interpreter in the
1668        selected GDB frame, or None'''
1669        frame = cls.get_selected_frame()
1670
1671        while frame:
1672            if frame.is_evalframe():
1673                return frame
1674            frame = frame.older()
1675
1676        # Not found:
1677        return None
1678
1679    def print_summary(self):
1680        if self.is_evalframe():
1681            pyop = self.get_pyop()
1682            if pyop:
1683                line = pyop.get_truncated_repr(MAX_OUTPUT_LEN)
1684                write_unicode(sys.stdout, '#%i %s\n' % (self.get_index(), line))
1685                if not pyop.is_optimized_out():
1686                    line = pyop.current_line()
1687                    if line is not None:
1688                        sys.stdout.write('    %s\n' % line.strip())
1689            else:
1690                sys.stdout.write('#%i (unable to read python frame information)\n' % self.get_index())
1691        else:
1692            info = self.is_other_python_frame()
1693            if info:
1694                sys.stdout.write('#%i %s\n' % (self.get_index(), info))
1695            else:
1696                sys.stdout.write('#%i\n' % self.get_index())
1697
1698    def print_traceback(self):
1699        if self.is_evalframe():
1700            pyop = self.get_pyop()
1701            if pyop:
1702                pyop.print_traceback()
1703                if not pyop.is_optimized_out():
1704                    line = pyop.current_line()
1705                    if line is not None:
1706                        sys.stdout.write('    %s\n' % line.strip())
1707            else:
1708                sys.stdout.write('  (unable to read python frame information)\n')
1709        else:
1710            info = self.is_other_python_frame()
1711            if info:
1712                sys.stdout.write('  %s\n' % info)
1713            else:
1714                sys.stdout.write('  (not a python frame)\n')
1715
1716class PyList(gdb.Command):
1717    '''List the current Python source code, if any
1718
1719    Use
1720       py-list START
1721    to list at a different line number within the python source.
1722
1723    Use
1724       py-list START, END
1725    to list a specific range of lines within the python source.
1726    '''
1727
1728    def __init__(self):
1729        gdb.Command.__init__ (self,
1730                              "py-list",
1731                              gdb.COMMAND_FILES,
1732                              gdb.COMPLETE_NONE)
1733
1734
1735    def invoke(self, args, from_tty):
1736        import re
1737
1738        start = None
1739        end = None
1740
1741        m = re.match(r'\s*(\d+)\s*', args)
1742        if m:
1743            start = int(m.group(0))
1744            end = start + 10
1745
1746        m = re.match(r'\s*(\d+)\s*,\s*(\d+)\s*', args)
1747        if m:
1748            start, end = map(int, m.groups())
1749
1750        # py-list requires an actual PyEval_EvalFrameEx frame:
1751        frame = Frame.get_selected_bytecode_frame()
1752        if not frame:
1753            print('Unable to locate gdb frame for python bytecode interpreter')
1754            return
1755
1756        pyop = frame.get_pyop()
1757        if not pyop or pyop.is_optimized_out():
1758            print(UNABLE_READ_INFO_PYTHON_FRAME)
1759            return
1760
1761        filename = pyop.filename()
1762        lineno = pyop.current_line_num()
1763        if lineno is None:
1764            print('Unable to read python frame line number')
1765            return
1766
1767        if start is None:
1768            start = lineno - 5
1769            end = lineno + 5
1770
1771        if start<1:
1772            start = 1
1773
1774        try:
1775            f = open(os_fsencode(filename), 'r')
1776        except IOError as err:
1777            sys.stdout.write('Unable to open %s: %s\n'
1778                             % (filename, err))
1779            return
1780        with f:
1781            all_lines = f.readlines()
1782            # start and end are 1-based, all_lines is 0-based;
1783            # so [start-1:end] as a python slice gives us [start, end] as a
1784            # closed interval
1785            for i, line in enumerate(all_lines[start-1:end]):
1786                linestr = str(i+start)
1787                # Highlight current line:
1788                if i + start == lineno:
1789                    linestr = '>' + linestr
1790                sys.stdout.write('%4s    %s' % (linestr, line))
1791
1792
1793# ...and register the command:
1794PyList()
1795
1796def move_in_stack(move_up):
1797    '''Move up or down the stack (for the py-up/py-down command)'''
1798    frame = Frame.get_selected_python_frame()
1799    if not frame:
1800        print('Unable to locate python frame')
1801        return
1802
1803    while frame:
1804        if move_up:
1805            iter_frame = frame.older()
1806        else:
1807            iter_frame = frame.newer()
1808
1809        if not iter_frame:
1810            break
1811
1812        if iter_frame.is_python_frame():
1813            # Result:
1814            if iter_frame.select():
1815                iter_frame.print_summary()
1816            return
1817
1818        frame = iter_frame
1819
1820    if move_up:
1821        print('Unable to find an older python frame')
1822    else:
1823        print('Unable to find a newer python frame')
1824
1825class PyUp(gdb.Command):
1826    'Select and print the python stack frame that called this one (if any)'
1827    def __init__(self):
1828        gdb.Command.__init__ (self,
1829                              "py-up",
1830                              gdb.COMMAND_STACK,
1831                              gdb.COMPLETE_NONE)
1832
1833
1834    def invoke(self, args, from_tty):
1835        move_in_stack(move_up=True)
1836
1837class PyDown(gdb.Command):
1838    'Select and print the python stack frame called by this one (if any)'
1839    def __init__(self):
1840        gdb.Command.__init__ (self,
1841                              "py-down",
1842                              gdb.COMMAND_STACK,
1843                              gdb.COMPLETE_NONE)
1844
1845
1846    def invoke(self, args, from_tty):
1847        move_in_stack(move_up=False)
1848
1849# Not all builds of gdb have gdb.Frame.select
1850if hasattr(gdb.Frame, 'select'):
1851    PyUp()
1852    PyDown()
1853
1854class PyBacktraceFull(gdb.Command):
1855    'Display the current python frame and all the frames within its call stack (if any)'
1856    def __init__(self):
1857        gdb.Command.__init__ (self,
1858                              "py-bt-full",
1859                              gdb.COMMAND_STACK,
1860                              gdb.COMPLETE_NONE)
1861
1862
1863    def invoke(self, args, from_tty):
1864        frame = Frame.get_selected_python_frame()
1865        if not frame:
1866            print('Unable to locate python frame')
1867            return
1868
1869        while frame:
1870            if frame.is_python_frame():
1871                frame.print_summary()
1872            frame = frame.older()
1873
1874PyBacktraceFull()
1875
1876class PyBacktrace(gdb.Command):
1877    'Display the current python frame and all the frames within its call stack (if any)'
1878    def __init__(self):
1879        gdb.Command.__init__ (self,
1880                              "py-bt",
1881                              gdb.COMMAND_STACK,
1882                              gdb.COMPLETE_NONE)
1883
1884
1885    def invoke(self, args, from_tty):
1886        frame = Frame.get_selected_python_frame()
1887        if not frame:
1888            print('Unable to locate python frame')
1889            return
1890
1891        sys.stdout.write('Traceback (most recent call first):\n')
1892        while frame:
1893            if frame.is_python_frame():
1894                frame.print_traceback()
1895            frame = frame.older()
1896
1897PyBacktrace()
1898
1899class PyPrint(gdb.Command):
1900    'Look up the given python variable name, and print it'
1901    def __init__(self):
1902        gdb.Command.__init__ (self,
1903                              "py-print",
1904                              gdb.COMMAND_DATA,
1905                              gdb.COMPLETE_NONE)
1906
1907
1908    def invoke(self, args, from_tty):
1909        name = str(args)
1910
1911        frame = Frame.get_selected_python_frame()
1912        if not frame:
1913            print('Unable to locate python frame')
1914            return
1915
1916        pyop_frame = frame.get_pyop()
1917        if not pyop_frame:
1918            print(UNABLE_READ_INFO_PYTHON_FRAME)
1919            return
1920
1921        pyop_var, scope = pyop_frame.get_var_by_name(name)
1922
1923        if pyop_var:
1924            print('%s %r = %s'
1925                   % (scope,
1926                      name,
1927                      pyop_var.get_truncated_repr(MAX_OUTPUT_LEN)))
1928        else:
1929            print('%r not found' % name)
1930
1931PyPrint()
1932
1933class PyLocals(gdb.Command):
1934    'Look up the given python variable name, and print it'
1935    def __init__(self):
1936        gdb.Command.__init__ (self,
1937                              "py-locals",
1938                              gdb.COMMAND_DATA,
1939                              gdb.COMPLETE_NONE)
1940
1941
1942    def invoke(self, args, from_tty):
1943        name = str(args)
1944
1945        frame = Frame.get_selected_python_frame()
1946        if not frame:
1947            print('Unable to locate python frame')
1948            return
1949
1950        pyop_frame = frame.get_pyop()
1951        if not pyop_frame:
1952            print(UNABLE_READ_INFO_PYTHON_FRAME)
1953            return
1954
1955        for pyop_name, pyop_value in pyop_frame.iter_locals():
1956            print('%s = %s'
1957                   % (pyop_name.proxyval(set()),
1958                      pyop_value.get_truncated_repr(MAX_OUTPUT_LEN)))
1959
1960PyLocals()
1961