• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#!/usr/bin/python
2'''
3From gdb 7 onwards, gdb's build can be configured --with-python, allowing gdb
4to be extended with Python code e.g. for library-specific data visualizations,
5such as for the C++ STL types.  Documentation on this API can be seen at:
6http://sourceware.org/gdb/current/onlinedocs/gdb/Python-API.html
7
8
9This python module deals with the case when the process being debugged (the
10"inferior process" in gdb parlance) is itself python, or more specifically,
11linked against libpython.  In this situation, almost every item of data is a
12(PyObject*), and having the debugger merely print their addresses is not very
13enlightening.
14
15This module embeds knowledge about the implementation details of libpython so
16that we can emit useful visualizations e.g. a string, a list, a dict, a frame
17giving file/line information and the state of local variables
18
19In particular, given a gdb.Value corresponding to a PyObject* in the inferior
20process, we can generate a "proxy value" within the gdb process.  For example,
21given a PyObject* in the inferior process that is in fact a PyListObject*
22holding three PyObject* that turn out to be PyBytesObject* instances, we can
23generate a proxy value within the gdb process that is a list of bytes
24instances:
25  [b"foo", b"bar", b"baz"]
26
27Doing so can be expensive for complicated graphs of objects, and could take
28some time, so we also have a "write_repr" method that writes a representation
29of the data to a file-like object.  This allows us to stop the traversal by
30having the file-like object raise an exception if it gets too much data.
31
32With both "proxyval" and "write_repr" we keep track of the set of all addresses
33visited so far in the traversal, to avoid infinite recursion due to cycles in
34the graph of object references.
35
36We try to defer gdb.lookup_type() invocations for python types until as late as
37possible: for a dynamically linked python binary, when the process starts in
38the debugger, the libpython.so hasn't been dynamically loaded yet, so none of
39the type names are known to the debugger
40
41The module also extends gdb with some python-specific commands.
42'''
43
44# NOTE: some gdbs are linked with Python 3, so this file should be dual-syntax
45# compatible (2.6+ and 3.0+).  See #19308.
46
47from __future__ import print_function
48import gdb
49import os
50import locale
51import sys
52
53if sys.version_info[0] >= 3:
54    unichr = chr
55    xrange = range
56    long = int
57
58# Look up the gdb.Type for some standard types:
59# Those need to be refreshed as types (pointer sizes) may change when
60# gdb loads different executables
61
62def _type_char_ptr():
63    return gdb.lookup_type('char').pointer()  # char*
64
65
66def _type_unsigned_char_ptr():
67    return gdb.lookup_type('unsigned char').pointer()  # unsigned char*
68
69
70def _type_unsigned_short_ptr():
71    return gdb.lookup_type('unsigned short').pointer()
72
73
74def _type_unsigned_int_ptr():
75    return gdb.lookup_type('unsigned int').pointer()
76
77
78def _sizeof_void_p():
79    return gdb.lookup_type('void').pointer().sizeof
80
81
82# value computed later, see PyUnicodeObjectPtr.proxy()
83_is_pep393 = None
84
85Py_TPFLAGS_HEAPTYPE = (1 << 9)
86Py_TPFLAGS_LONG_SUBCLASS     = (1 << 24)
87Py_TPFLAGS_LIST_SUBCLASS     = (1 << 25)
88Py_TPFLAGS_TUPLE_SUBCLASS    = (1 << 26)
89Py_TPFLAGS_BYTES_SUBCLASS    = (1 << 27)
90Py_TPFLAGS_UNICODE_SUBCLASS  = (1 << 28)
91Py_TPFLAGS_DICT_SUBCLASS     = (1 << 29)
92Py_TPFLAGS_BASE_EXC_SUBCLASS = (1 << 30)
93Py_TPFLAGS_TYPE_SUBCLASS     = (1 << 31)
94
95
96MAX_OUTPUT_LEN=1024
97
98hexdigits = "0123456789abcdef"
99
100ENCODING = locale.getpreferredencoding()
101
102class NullPyObjectPtr(RuntimeError):
103    pass
104
105
106def safety_limit(val):
107    # Given an integer value from the process being debugged, limit it to some
108    # safety threshold so that arbitrary breakage within said process doesn't
109    # break the gdb process too much (e.g. sizes of iterations, sizes of lists)
110    return min(val, 1000)
111
112
113def safe_range(val):
114    # As per range, but don't trust the value too much: cap it to a safety
115    # threshold in case the data was corrupted
116    return xrange(safety_limit(int(val)))
117
118if sys.version_info[0] >= 3:
119    def write_unicode(file, text):
120        file.write(text)
121else:
122    def write_unicode(file, text):
123        # Write a byte or unicode string to file. Unicode strings are encoded to
124        # ENCODING encoding with 'backslashreplace' error handler to avoid
125        # UnicodeEncodeError.
126        if isinstance(text, unicode):
127            text = text.encode(ENCODING, 'backslashreplace')
128        file.write(text)
129
130try:
131    os_fsencode = os.fsencode
132except AttributeError:
133    def os_fsencode(filename):
134        if not isinstance(filename, unicode):
135            return filename
136        encoding = sys.getfilesystemencoding()
137        if encoding == 'mbcs':
138            # mbcs doesn't support surrogateescape
139            return filename.encode(encoding)
140        encoded = []
141        for char in filename:
142            # surrogateescape error handler
143            if 0xDC80 <= ord(char) <= 0xDCFF:
144                byte = chr(ord(char) - 0xDC00)
145            else:
146                byte = char.encode(encoding)
147            encoded.append(byte)
148        return ''.join(encoded)
149
150class StringTruncated(RuntimeError):
151    pass
152
153class TruncatedStringIO(object):
154    '''Similar to io.StringIO, but can truncate the output by raising a
155    StringTruncated exception'''
156    def __init__(self, maxlen=None):
157        self._val = ''
158        self.maxlen = maxlen
159
160    def write(self, data):
161        if self.maxlen:
162            if len(data) + len(self._val) > self.maxlen:
163                # Truncation:
164                self._val += data[0:self.maxlen - len(self._val)]
165                raise StringTruncated()
166
167        self._val += data
168
169    def getvalue(self):
170        return self._val
171
172class PyObjectPtr(object):
173    """
174    Class wrapping a gdb.Value that's either a (PyObject*) within the
175    inferior process, or some subclass pointer e.g. (PyBytesObject*)
176
177    There will be a subclass for every refined PyObject type that we care
178    about.
179
180    Note that at every stage the underlying pointer could be NULL, point
181    to corrupt data, etc; this is the debugger, after all.
182    """
183    _typename = 'PyObject'
184
185    def __init__(self, gdbval, cast_to=None):
186        if cast_to:
187            self._gdbval = gdbval.cast(cast_to)
188        else:
189            self._gdbval = gdbval
190
191    def field(self, name):
192        '''
193        Get the gdb.Value for the given field within the PyObject, coping with
194        some python 2 versus python 3 differences.
195
196        Various libpython types are defined using the "PyObject_HEAD" and
197        "PyObject_VAR_HEAD" macros.
198
199        In Python 2, this these are defined so that "ob_type" and (for a var
200        object) "ob_size" are fields of the type in question.
201
202        In Python 3, this is defined as an embedded PyVarObject type thus:
203           PyVarObject ob_base;
204        so that the "ob_size" field is located insize the "ob_base" field, and
205        the "ob_type" is most easily accessed by casting back to a (PyObject*).
206        '''
207        if self.is_null():
208            raise NullPyObjectPtr(self)
209
210        if name == 'ob_type':
211            pyo_ptr = self._gdbval.cast(PyObjectPtr.get_gdb_type())
212            return pyo_ptr.dereference()[name]
213
214        if name == 'ob_size':
215            pyo_ptr = self._gdbval.cast(PyVarObjectPtr.get_gdb_type())
216            return pyo_ptr.dereference()[name]
217
218        # General case: look it up inside the object:
219        return self._gdbval.dereference()[name]
220
221    def pyop_field(self, name):
222        '''
223        Get a PyObjectPtr for the given PyObject* field within this PyObject,
224        coping with some python 2 versus python 3 differences.
225        '''
226        return PyObjectPtr.from_pyobject_ptr(self.field(name))
227
228    def write_field_repr(self, name, out, visited):
229        '''
230        Extract the PyObject* field named "name", and write its representation
231        to file-like object "out"
232        '''
233        field_obj = self.pyop_field(name)
234        field_obj.write_repr(out, visited)
235
236    def get_truncated_repr(self, maxlen):
237        '''
238        Get a repr-like string for the data, but truncate it at "maxlen" bytes
239        (ending the object graph traversal as soon as you do)
240        '''
241        out = TruncatedStringIO(maxlen)
242        try:
243            self.write_repr(out, set())
244        except StringTruncated:
245            # Truncation occurred:
246            return out.getvalue() + '...(truncated)'
247
248        # No truncation occurred:
249        return out.getvalue()
250
251    def type(self):
252        return PyTypeObjectPtr(self.field('ob_type'))
253
254    def is_null(self):
255        return 0 == long(self._gdbval)
256
257    def is_optimized_out(self):
258        '''
259        Is the value of the underlying PyObject* visible to the debugger?
260
261        This can vary with the precise version of the compiler used to build
262        Python, and the precise version of gdb.
263
264        See e.g. https://bugzilla.redhat.com/show_bug.cgi?id=556975 with
265        PyEval_EvalFrameEx's "f"
266        '''
267        return self._gdbval.is_optimized_out
268
269    def safe_tp_name(self):
270        try:
271            return self.type().field('tp_name').string()
272        except NullPyObjectPtr:
273            # NULL tp_name?
274            return 'unknown'
275        except RuntimeError:
276            # Can't even read the object at all?
277            return 'unknown'
278
279    def proxyval(self, visited):
280        '''
281        Scrape a value from the inferior process, and try to represent it
282        within the gdb process, whilst (hopefully) avoiding crashes when
283        the remote data is corrupt.
284
285        Derived classes will override this.
286
287        For example, a PyIntObject* with ob_ival 42 in the inferior process
288        should result in an int(42) in this process.
289
290        visited: a set of all gdb.Value pyobject pointers already visited
291        whilst generating this value (to guard against infinite recursion when
292        visiting object graphs with loops).  Analogous to Py_ReprEnter and
293        Py_ReprLeave
294        '''
295
296        class FakeRepr(object):
297            """
298            Class representing a non-descript PyObject* value in the inferior
299            process for when we don't have a custom scraper, intended to have
300            a sane repr().
301            """
302
303            def __init__(self, tp_name, address):
304                self.tp_name = tp_name
305                self.address = address
306
307            def __repr__(self):
308                # For the NULL pointer, we have no way of knowing a type, so
309                # special-case it as per
310                # http://bugs.python.org/issue8032#msg100882
311                if self.address == 0:
312                    return '0x0'
313                return '<%s at remote 0x%x>' % (self.tp_name, self.address)
314
315        return FakeRepr(self.safe_tp_name(),
316                        long(self._gdbval))
317
318    def write_repr(self, out, visited):
319        '''
320        Write a string representation of the value scraped from the inferior
321        process to "out", a file-like object.
322        '''
323        # Default implementation: generate a proxy value and write its repr
324        # However, this could involve a lot of work for complicated objects,
325        # so for derived classes we specialize this
326        return out.write(repr(self.proxyval(visited)))
327
328    @classmethod
329    def subclass_from_type(cls, t):
330        '''
331        Given a PyTypeObjectPtr instance wrapping a gdb.Value that's a
332        (PyTypeObject*), determine the corresponding subclass of PyObjectPtr
333        to use
334
335        Ideally, we would look up the symbols for the global types, but that
336        isn't working yet:
337          (gdb) python print gdb.lookup_symbol('PyList_Type')[0].value
338          Traceback (most recent call last):
339            File "<string>", line 1, in <module>
340          NotImplementedError: Symbol type not yet supported in Python scripts.
341          Error while executing Python code.
342
343        For now, we use tp_flags, after doing some string comparisons on the
344        tp_name for some special-cases that don't seem to be visible through
345        flags
346        '''
347        try:
348            tp_name = t.field('tp_name').string()
349            tp_flags = int(t.field('tp_flags'))
350        except RuntimeError:
351            # Handle any kind of error e.g. NULL ptrs by simply using the base
352            # class
353            return cls
354
355        #print('tp_flags = 0x%08x' % tp_flags)
356        #print('tp_name = %r' % tp_name)
357
358        name_map = {'bool': PyBoolObjectPtr,
359                    'classobj': PyClassObjectPtr,
360                    'NoneType': PyNoneStructPtr,
361                    'frame': PyFrameObjectPtr,
362                    'set' : PySetObjectPtr,
363                    'frozenset' : PySetObjectPtr,
364                    'builtin_function_or_method' : PyCFunctionObjectPtr,
365                    }
366        if tp_name in name_map:
367            return name_map[tp_name]
368
369        if tp_flags & Py_TPFLAGS_HEAPTYPE:
370            return HeapTypeObjectPtr
371
372        if tp_flags & Py_TPFLAGS_LONG_SUBCLASS:
373            return PyLongObjectPtr
374        if tp_flags & Py_TPFLAGS_LIST_SUBCLASS:
375            return PyListObjectPtr
376        if tp_flags & Py_TPFLAGS_TUPLE_SUBCLASS:
377            return PyTupleObjectPtr
378        if tp_flags & Py_TPFLAGS_BYTES_SUBCLASS:
379            return PyBytesObjectPtr
380        if tp_flags & Py_TPFLAGS_UNICODE_SUBCLASS:
381            return PyUnicodeObjectPtr
382        if tp_flags & Py_TPFLAGS_DICT_SUBCLASS:
383            return PyDictObjectPtr
384        if tp_flags & Py_TPFLAGS_BASE_EXC_SUBCLASS:
385            return PyBaseExceptionObjectPtr
386        #if tp_flags & Py_TPFLAGS_TYPE_SUBCLASS:
387        #    return PyTypeObjectPtr
388
389        # Use the base class:
390        return cls
391
392    @classmethod
393    def from_pyobject_ptr(cls, gdbval):
394        '''
395        Try to locate the appropriate derived class dynamically, and cast
396        the pointer accordingly.
397        '''
398        try:
399            p = PyObjectPtr(gdbval)
400            cls = cls.subclass_from_type(p.type())
401            return cls(gdbval, cast_to=cls.get_gdb_type())
402        except RuntimeError:
403            # Handle any kind of error e.g. NULL ptrs by simply using the base
404            # class
405            pass
406        return cls(gdbval)
407
408    @classmethod
409    def get_gdb_type(cls):
410        return gdb.lookup_type(cls._typename).pointer()
411
412    def as_address(self):
413        return long(self._gdbval)
414
415class PyVarObjectPtr(PyObjectPtr):
416    _typename = 'PyVarObject'
417
418class ProxyAlreadyVisited(object):
419    '''
420    Placeholder proxy to use when protecting against infinite recursion due to
421    loops in the object graph.
422
423    Analogous to the values emitted by the users of Py_ReprEnter and Py_ReprLeave
424    '''
425    def __init__(self, rep):
426        self._rep = rep
427
428    def __repr__(self):
429        return self._rep
430
431
432def _write_instance_repr(out, visited, name, pyop_attrdict, address):
433    '''Shared code for use by all classes:
434    write a representation to file-like object "out"'''
435    out.write('<')
436    out.write(name)
437
438    # Write dictionary of instance attributes:
439    if isinstance(pyop_attrdict, PyDictObjectPtr):
440        out.write('(')
441        first = True
442        for pyop_arg, pyop_val in pyop_attrdict.iteritems():
443            if not first:
444                out.write(', ')
445            first = False
446            out.write(pyop_arg.proxyval(visited))
447            out.write('=')
448            pyop_val.write_repr(out, visited)
449        out.write(')')
450    out.write(' at remote 0x%x>' % address)
451
452
453class InstanceProxy(object):
454
455    def __init__(self, cl_name, attrdict, address):
456        self.cl_name = cl_name
457        self.attrdict = attrdict
458        self.address = address
459
460    def __repr__(self):
461        if isinstance(self.attrdict, dict):
462            kwargs = ', '.join(["%s=%r" % (arg, val)
463                                for arg, val in self.attrdict.iteritems()])
464            return '<%s(%s) at remote 0x%x>' % (self.cl_name,
465                                                kwargs, self.address)
466        else:
467            return '<%s at remote 0x%x>' % (self.cl_name,
468                                            self.address)
469
470def _PyObject_VAR_SIZE(typeobj, nitems):
471    if _PyObject_VAR_SIZE._type_size_t is None:
472        _PyObject_VAR_SIZE._type_size_t = gdb.lookup_type('size_t')
473
474    return ( ( typeobj.field('tp_basicsize') +
475               nitems * typeobj.field('tp_itemsize') +
476               (_sizeof_void_p() - 1)
477             ) & ~(_sizeof_void_p() - 1)
478           ).cast(_PyObject_VAR_SIZE._type_size_t)
479_PyObject_VAR_SIZE._type_size_t = None
480
481class HeapTypeObjectPtr(PyObjectPtr):
482    _typename = 'PyObject'
483
484    def get_attr_dict(self):
485        '''
486        Get the PyDictObject ptr representing the attribute dictionary
487        (or None if there's a problem)
488        '''
489        try:
490            typeobj = self.type()
491            dictoffset = int_from_int(typeobj.field('tp_dictoffset'))
492            if dictoffset != 0:
493                if dictoffset < 0:
494                    type_PyVarObject_ptr = gdb.lookup_type('PyVarObject').pointer()
495                    tsize = int_from_int(self._gdbval.cast(type_PyVarObject_ptr)['ob_size'])
496                    if tsize < 0:
497                        tsize = -tsize
498                    size = _PyObject_VAR_SIZE(typeobj, tsize)
499                    dictoffset += size
500                    assert dictoffset > 0
501                    assert dictoffset % _sizeof_void_p() == 0
502
503                dictptr = self._gdbval.cast(_type_char_ptr()) + dictoffset
504                PyObjectPtrPtr = PyObjectPtr.get_gdb_type().pointer()
505                dictptr = dictptr.cast(PyObjectPtrPtr)
506                return PyObjectPtr.from_pyobject_ptr(dictptr.dereference())
507        except RuntimeError:
508            # Corrupt data somewhere; fail safe
509            pass
510
511        # Not found, or some kind of error:
512        return None
513
514    def proxyval(self, visited):
515        '''
516        Support for classes.
517
518        Currently we just locate the dictionary using a transliteration to
519        python of _PyObject_GetDictPtr, ignoring descriptors
520        '''
521        # Guard against infinite loops:
522        if self.as_address() in visited:
523            return ProxyAlreadyVisited('<...>')
524        visited.add(self.as_address())
525
526        pyop_attr_dict = self.get_attr_dict()
527        if pyop_attr_dict:
528            attr_dict = pyop_attr_dict.proxyval(visited)
529        else:
530            attr_dict = {}
531        tp_name = self.safe_tp_name()
532
533        # Class:
534        return InstanceProxy(tp_name, attr_dict, long(self._gdbval))
535
536    def write_repr(self, out, visited):
537        # Guard against infinite loops:
538        if self.as_address() in visited:
539            out.write('<...>')
540            return
541        visited.add(self.as_address())
542
543        pyop_attrdict = self.get_attr_dict()
544        _write_instance_repr(out, visited,
545                             self.safe_tp_name(), pyop_attrdict, self.as_address())
546
547class ProxyException(Exception):
548    def __init__(self, tp_name, args):
549        self.tp_name = tp_name
550        self.args = args
551
552    def __repr__(self):
553        return '%s%r' % (self.tp_name, self.args)
554
555class PyBaseExceptionObjectPtr(PyObjectPtr):
556    """
557    Class wrapping a gdb.Value that's a PyBaseExceptionObject* i.e. an exception
558    within the process being debugged.
559    """
560    _typename = 'PyBaseExceptionObject'
561
562    def proxyval(self, visited):
563        # Guard against infinite loops:
564        if self.as_address() in visited:
565            return ProxyAlreadyVisited('(...)')
566        visited.add(self.as_address())
567        arg_proxy = self.pyop_field('args').proxyval(visited)
568        return ProxyException(self.safe_tp_name(),
569                              arg_proxy)
570
571    def write_repr(self, out, visited):
572        # Guard against infinite loops:
573        if self.as_address() in visited:
574            out.write('(...)')
575            return
576        visited.add(self.as_address())
577
578        out.write(self.safe_tp_name())
579        self.write_field_repr('args', out, visited)
580
581class PyClassObjectPtr(PyObjectPtr):
582    """
583    Class wrapping a gdb.Value that's a PyClassObject* i.e. a <classobj>
584    instance within the process being debugged.
585    """
586    _typename = 'PyClassObject'
587
588
589class BuiltInFunctionProxy(object):
590    def __init__(self, ml_name):
591        self.ml_name = ml_name
592
593    def __repr__(self):
594        return "<built-in function %s>" % self.ml_name
595
596class BuiltInMethodProxy(object):
597    def __init__(self, ml_name, pyop_m_self):
598        self.ml_name = ml_name
599        self.pyop_m_self = pyop_m_self
600
601    def __repr__(self):
602        return ('<built-in method %s of %s object at remote 0x%x>'
603                % (self.ml_name,
604                   self.pyop_m_self.safe_tp_name(),
605                   self.pyop_m_self.as_address())
606                )
607
608class PyCFunctionObjectPtr(PyObjectPtr):
609    """
610    Class wrapping a gdb.Value that's a PyCFunctionObject*
611    (see Include/methodobject.h and Objects/methodobject.c)
612    """
613    _typename = 'PyCFunctionObject'
614
615    def proxyval(self, visited):
616        m_ml = self.field('m_ml') # m_ml is a (PyMethodDef*)
617        ml_name = m_ml['ml_name'].string()
618
619        pyop_m_self = self.pyop_field('m_self')
620        if pyop_m_self.is_null():
621            return BuiltInFunctionProxy(ml_name)
622        else:
623            return BuiltInMethodProxy(ml_name, pyop_m_self)
624
625
626class PyCodeObjectPtr(PyObjectPtr):
627    """
628    Class wrapping a gdb.Value that's a PyCodeObject* i.e. a <code> instance
629    within the process being debugged.
630    """
631    _typename = 'PyCodeObject'
632
633    def addr2line(self, addrq):
634        '''
635        Get the line number for a given bytecode offset
636
637        Analogous to PyCode_Addr2Line; translated from pseudocode in
638        Objects/lnotab_notes.txt
639        '''
640        co_lnotab = self.pyop_field('co_lnotab').proxyval(set())
641
642        # Initialize lineno to co_firstlineno as per PyCode_Addr2Line
643        # not 0, as lnotab_notes.txt has it:
644        lineno = int_from_int(self.field('co_firstlineno'))
645
646        addr = 0
647        for addr_incr, line_incr in zip(co_lnotab[::2], co_lnotab[1::2]):
648            addr += ord(addr_incr)
649            if addr > addrq:
650                return lineno
651            lineno += ord(line_incr)
652        return lineno
653
654
655class PyDictObjectPtr(PyObjectPtr):
656    """
657    Class wrapping a gdb.Value that's a PyDictObject* i.e. a dict instance
658    within the process being debugged.
659    """
660    _typename = 'PyDictObject'
661
662    def iteritems(self):
663        '''
664        Yields a sequence of (PyObjectPtr key, PyObjectPtr value) pairs,
665        analogous to dict.iteritems()
666        '''
667        keys = self.field('ma_keys')
668        values = self.field('ma_values')
669        entries, nentries = self._get_entries(keys)
670        for i in safe_range(nentries):
671            ep = entries[i]
672            if long(values):
673                pyop_value = PyObjectPtr.from_pyobject_ptr(values[i])
674            else:
675                pyop_value = PyObjectPtr.from_pyobject_ptr(ep['me_value'])
676            if not pyop_value.is_null():
677                pyop_key = PyObjectPtr.from_pyobject_ptr(ep['me_key'])
678                yield (pyop_key, pyop_value)
679
680    def proxyval(self, visited):
681        # Guard against infinite loops:
682        if self.as_address() in visited:
683            return ProxyAlreadyVisited('{...}')
684        visited.add(self.as_address())
685
686        result = {}
687        for pyop_key, pyop_value in self.iteritems():
688            proxy_key = pyop_key.proxyval(visited)
689            proxy_value = pyop_value.proxyval(visited)
690            result[proxy_key] = proxy_value
691        return result
692
693    def write_repr(self, out, visited):
694        # Guard against infinite loops:
695        if self.as_address() in visited:
696            out.write('{...}')
697            return
698        visited.add(self.as_address())
699
700        out.write('{')
701        first = True
702        for pyop_key, pyop_value in self.iteritems():
703            if not first:
704                out.write(', ')
705            first = False
706            pyop_key.write_repr(out, visited)
707            out.write(': ')
708            pyop_value.write_repr(out, visited)
709        out.write('}')
710
711    def _get_entries(self, keys):
712        dk_nentries = int(keys['dk_nentries'])
713        dk_size = int(keys['dk_size'])
714        try:
715            # <= Python 3.5
716            return keys['dk_entries'], dk_size
717        except gdb.error:
718            # >= Python 3.6
719            pass
720
721        if dk_size <= 0xFF:
722            offset = dk_size
723        elif dk_size <= 0xFFFF:
724            offset = 2 * dk_size
725        elif dk_size <= 0xFFFFFFFF:
726            offset = 4 * dk_size
727        else:
728            offset = 8 * dk_size
729
730        ent_addr = keys['dk_indices']['as_1'].address
731        ent_addr = ent_addr.cast(_type_unsigned_char_ptr()) + offset
732        ent_ptr_t = gdb.lookup_type('PyDictKeyEntry').pointer()
733        ent_addr = ent_addr.cast(ent_ptr_t)
734
735        return ent_addr, dk_nentries
736
737
738class PyListObjectPtr(PyObjectPtr):
739    _typename = 'PyListObject'
740
741    def __getitem__(self, i):
742        # Get the gdb.Value for the (PyObject*) with the given index:
743        field_ob_item = self.field('ob_item')
744        return field_ob_item[i]
745
746    def proxyval(self, visited):
747        # Guard against infinite loops:
748        if self.as_address() in visited:
749            return ProxyAlreadyVisited('[...]')
750        visited.add(self.as_address())
751
752        result = [PyObjectPtr.from_pyobject_ptr(self[i]).proxyval(visited)
753                  for i in safe_range(int_from_int(self.field('ob_size')))]
754        return result
755
756    def write_repr(self, out, visited):
757        # Guard against infinite loops:
758        if self.as_address() in visited:
759            out.write('[...]')
760            return
761        visited.add(self.as_address())
762
763        out.write('[')
764        for i in safe_range(int_from_int(self.field('ob_size'))):
765            if i > 0:
766                out.write(', ')
767            element = PyObjectPtr.from_pyobject_ptr(self[i])
768            element.write_repr(out, visited)
769        out.write(']')
770
771class PyLongObjectPtr(PyObjectPtr):
772    _typename = 'PyLongObject'
773
774    def proxyval(self, visited):
775        '''
776        Python's Include/longobjrep.h has this declaration:
777           struct _longobject {
778               PyObject_VAR_HEAD
779               digit ob_digit[1];
780           };
781
782        with this description:
783            The absolute value of a number is equal to
784                 SUM(for i=0 through abs(ob_size)-1) ob_digit[i] * 2**(SHIFT*i)
785            Negative numbers are represented with ob_size < 0;
786            zero is represented by ob_size == 0.
787
788        where SHIFT can be either:
789            #define PyLong_SHIFT        30
790            #define PyLong_SHIFT        15
791        '''
792        ob_size = long(self.field('ob_size'))
793        if ob_size == 0:
794            return 0
795
796        ob_digit = self.field('ob_digit')
797
798        if gdb.lookup_type('digit').sizeof == 2:
799            SHIFT = 15
800        else:
801            SHIFT = 30
802
803        digits = [long(ob_digit[i]) * 2**(SHIFT*i)
804                  for i in safe_range(abs(ob_size))]
805        result = sum(digits)
806        if ob_size < 0:
807            result = -result
808        return result
809
810    def write_repr(self, out, visited):
811        # Write this out as a Python 3 int literal, i.e. without the "L" suffix
812        proxy = self.proxyval(visited)
813        out.write("%s" % proxy)
814
815
816class PyBoolObjectPtr(PyLongObjectPtr):
817    """
818    Class wrapping a gdb.Value that's a PyBoolObject* i.e. one of the two
819    <bool> instances (Py_True/Py_False) within the process being debugged.
820    """
821    def proxyval(self, visited):
822        if PyLongObjectPtr.proxyval(self, visited):
823            return True
824        else:
825            return False
826
827class PyNoneStructPtr(PyObjectPtr):
828    """
829    Class wrapping a gdb.Value that's a PyObject* pointing to the
830    singleton (we hope) _Py_NoneStruct with ob_type PyNone_Type
831    """
832    _typename = 'PyObject'
833
834    def proxyval(self, visited):
835        return None
836
837
838class PyFrameObjectPtr(PyObjectPtr):
839    _typename = 'PyFrameObject'
840
841    def __init__(self, gdbval, cast_to=None):
842        PyObjectPtr.__init__(self, gdbval, cast_to)
843
844        if not self.is_optimized_out():
845            self.co = PyCodeObjectPtr.from_pyobject_ptr(self.field('f_code'))
846            self.co_name = self.co.pyop_field('co_name')
847            self.co_filename = self.co.pyop_field('co_filename')
848
849            self.f_lineno = int_from_int(self.field('f_lineno'))
850            self.f_lasti = int_from_int(self.field('f_lasti'))
851            self.co_nlocals = int_from_int(self.co.field('co_nlocals'))
852            self.co_varnames = PyTupleObjectPtr.from_pyobject_ptr(self.co.field('co_varnames'))
853
854    def iter_locals(self):
855        '''
856        Yield a sequence of (name,value) pairs of PyObjectPtr instances, for
857        the local variables of this frame
858        '''
859        if self.is_optimized_out():
860            return
861
862        f_localsplus = self.field('f_localsplus')
863        for i in safe_range(self.co_nlocals):
864            pyop_value = PyObjectPtr.from_pyobject_ptr(f_localsplus[i])
865            if not pyop_value.is_null():
866                pyop_name = PyObjectPtr.from_pyobject_ptr(self.co_varnames[i])
867                yield (pyop_name, pyop_value)
868
869    def iter_globals(self):
870        '''
871        Yield a sequence of (name,value) pairs of PyObjectPtr instances, for
872        the global variables of this frame
873        '''
874        if self.is_optimized_out():
875            return ()
876
877        pyop_globals = self.pyop_field('f_globals')
878        return pyop_globals.iteritems()
879
880    def iter_builtins(self):
881        '''
882        Yield a sequence of (name,value) pairs of PyObjectPtr instances, for
883        the builtin variables
884        '''
885        if self.is_optimized_out():
886            return ()
887
888        pyop_builtins = self.pyop_field('f_builtins')
889        return pyop_builtins.iteritems()
890
891    def get_var_by_name(self, name):
892        '''
893        Look for the named local variable, returning a (PyObjectPtr, scope) pair
894        where scope is a string 'local', 'global', 'builtin'
895
896        If not found, return (None, None)
897        '''
898        for pyop_name, pyop_value in self.iter_locals():
899            if name == pyop_name.proxyval(set()):
900                return pyop_value, 'local'
901        for pyop_name, pyop_value in self.iter_globals():
902            if name == pyop_name.proxyval(set()):
903                return pyop_value, 'global'
904        for pyop_name, pyop_value in self.iter_builtins():
905            if name == pyop_name.proxyval(set()):
906                return pyop_value, 'builtin'
907        return None, None
908
909    def filename(self):
910        '''Get the path of the current Python source file, as a string'''
911        if self.is_optimized_out():
912            return '(frame information optimized out)'
913        return self.co_filename.proxyval(set())
914
915    def current_line_num(self):
916        '''Get current line number as an integer (1-based)
917
918        Translated from PyFrame_GetLineNumber and PyCode_Addr2Line
919
920        See Objects/lnotab_notes.txt
921        '''
922        if self.is_optimized_out():
923            return None
924        f_trace = self.field('f_trace')
925        if long(f_trace) != 0:
926            # we have a non-NULL f_trace:
927            return self.f_lineno
928        else:
929            #try:
930            return self.co.addr2line(self.f_lasti)
931            #except ValueError:
932            #    return self.f_lineno
933
934    def current_line(self):
935        '''Get the text of the current source line as a string, with a trailing
936        newline character'''
937        if self.is_optimized_out():
938            return '(frame information optimized out)'
939        filename = self.filename()
940        try:
941            f = open(os_fsencode(filename), 'r')
942        except IOError:
943            return None
944        with f:
945            all_lines = f.readlines()
946            # Convert from 1-based current_line_num to 0-based list offset:
947            return all_lines[self.current_line_num()-1]
948
949    def write_repr(self, out, visited):
950        if self.is_optimized_out():
951            out.write('(frame information optimized out)')
952            return
953        out.write('Frame 0x%x, for file %s, line %i, in %s ('
954                  % (self.as_address(),
955                     self.co_filename.proxyval(visited),
956                     self.current_line_num(),
957                     self.co_name.proxyval(visited)))
958        first = True
959        for pyop_name, pyop_value in self.iter_locals():
960            if not first:
961                out.write(', ')
962            first = False
963
964            out.write(pyop_name.proxyval(visited))
965            out.write('=')
966            pyop_value.write_repr(out, visited)
967
968        out.write(')')
969
970    def print_traceback(self):
971        if self.is_optimized_out():
972            sys.stdout.write('  (frame information optimized out)\n')
973            return
974        visited = set()
975        sys.stdout.write('  File "%s", line %i, in %s\n'
976                  % (self.co_filename.proxyval(visited),
977                     self.current_line_num(),
978                     self.co_name.proxyval(visited)))
979
980class PySetObjectPtr(PyObjectPtr):
981    _typename = 'PySetObject'
982
983    @classmethod
984    def _dummy_key(self):
985        return gdb.lookup_global_symbol('_PySet_Dummy').value()
986
987    def __iter__(self):
988        dummy_ptr = self._dummy_key()
989        table = self.field('table')
990        for i in safe_range(self.field('mask') + 1):
991            setentry = table[i]
992            key = setentry['key']
993            if key != 0 and key != dummy_ptr:
994                yield PyObjectPtr.from_pyobject_ptr(key)
995
996    def proxyval(self, visited):
997        # Guard against infinite loops:
998        if self.as_address() in visited:
999            return ProxyAlreadyVisited('%s(...)' % self.safe_tp_name())
1000        visited.add(self.as_address())
1001
1002        members = (key.proxyval(visited) for key in self)
1003        if self.safe_tp_name() == 'frozenset':
1004            return frozenset(members)
1005        else:
1006            return set(members)
1007
1008    def write_repr(self, out, visited):
1009        # Emulate Python 3's set_repr
1010        tp_name = self.safe_tp_name()
1011
1012        # Guard against infinite loops:
1013        if self.as_address() in visited:
1014            out.write('(...)')
1015            return
1016        visited.add(self.as_address())
1017
1018        # Python 3's set_repr special-cases the empty set:
1019        if not self.field('used'):
1020            out.write(tp_name)
1021            out.write('()')
1022            return
1023
1024        # Python 3 uses {} for set literals:
1025        if tp_name != 'set':
1026            out.write(tp_name)
1027            out.write('(')
1028
1029        out.write('{')
1030        first = True
1031        for key in self:
1032            if not first:
1033                out.write(', ')
1034            first = False
1035            key.write_repr(out, visited)
1036        out.write('}')
1037
1038        if tp_name != 'set':
1039            out.write(')')
1040
1041
1042class PyBytesObjectPtr(PyObjectPtr):
1043    _typename = 'PyBytesObject'
1044
1045    def __str__(self):
1046        field_ob_size = self.field('ob_size')
1047        field_ob_sval = self.field('ob_sval')
1048        char_ptr = field_ob_sval.address.cast(_type_unsigned_char_ptr())
1049        return ''.join([chr(char_ptr[i]) for i in safe_range(field_ob_size)])
1050
1051    def proxyval(self, visited):
1052        return str(self)
1053
1054    def write_repr(self, out, visited):
1055        # Write this out as a Python 3 bytes literal, i.e. with a "b" prefix
1056
1057        # Get a PyStringObject* within the Python 2 gdb process:
1058        proxy = self.proxyval(visited)
1059
1060        # Transliteration of Python 3's Objects/bytesobject.c:PyBytes_Repr
1061        # to Python 2 code:
1062        quote = "'"
1063        if "'" in proxy and not '"' in proxy:
1064            quote = '"'
1065        out.write('b')
1066        out.write(quote)
1067        for byte in proxy:
1068            if byte == quote or byte == '\\':
1069                out.write('\\')
1070                out.write(byte)
1071            elif byte == '\t':
1072                out.write('\\t')
1073            elif byte == '\n':
1074                out.write('\\n')
1075            elif byte == '\r':
1076                out.write('\\r')
1077            elif byte < ' ' or ord(byte) >= 0x7f:
1078                out.write('\\x')
1079                out.write(hexdigits[(ord(byte) & 0xf0) >> 4])
1080                out.write(hexdigits[ord(byte) & 0xf])
1081            else:
1082                out.write(byte)
1083        out.write(quote)
1084
1085class PyTupleObjectPtr(PyObjectPtr):
1086    _typename = 'PyTupleObject'
1087
1088    def __getitem__(self, i):
1089        # Get the gdb.Value for the (PyObject*) with the given index:
1090        field_ob_item = self.field('ob_item')
1091        return field_ob_item[i]
1092
1093    def proxyval(self, visited):
1094        # Guard against infinite loops:
1095        if self.as_address() in visited:
1096            return ProxyAlreadyVisited('(...)')
1097        visited.add(self.as_address())
1098
1099        result = tuple([PyObjectPtr.from_pyobject_ptr(self[i]).proxyval(visited)
1100                        for i in safe_range(int_from_int(self.field('ob_size')))])
1101        return result
1102
1103    def write_repr(self, out, visited):
1104        # Guard against infinite loops:
1105        if self.as_address() in visited:
1106            out.write('(...)')
1107            return
1108        visited.add(self.as_address())
1109
1110        out.write('(')
1111        for i in safe_range(int_from_int(self.field('ob_size'))):
1112            if i > 0:
1113                out.write(', ')
1114            element = PyObjectPtr.from_pyobject_ptr(self[i])
1115            element.write_repr(out, visited)
1116        if self.field('ob_size') == 1:
1117            out.write(',)')
1118        else:
1119            out.write(')')
1120
1121class PyTypeObjectPtr(PyObjectPtr):
1122    _typename = 'PyTypeObject'
1123
1124
1125def _unichr_is_printable(char):
1126    # Logic adapted from Python 3's Tools/unicode/makeunicodedata.py
1127    if char == u" ":
1128        return True
1129    import unicodedata
1130    return unicodedata.category(char) not in ("C", "Z")
1131
1132if sys.maxunicode >= 0x10000:
1133    _unichr = unichr
1134else:
1135    # Needed for proper surrogate support if sizeof(Py_UNICODE) is 2 in gdb
1136    def _unichr(x):
1137        if x < 0x10000:
1138            return unichr(x)
1139        x -= 0x10000
1140        ch1 = 0xD800 | (x >> 10)
1141        ch2 = 0xDC00 | (x & 0x3FF)
1142        return unichr(ch1) + unichr(ch2)
1143
1144
1145class PyUnicodeObjectPtr(PyObjectPtr):
1146    _typename = 'PyUnicodeObject'
1147
1148    def char_width(self):
1149        _type_Py_UNICODE = gdb.lookup_type('Py_UNICODE')
1150        return _type_Py_UNICODE.sizeof
1151
1152    def proxyval(self, visited):
1153        global _is_pep393
1154        if _is_pep393 is None:
1155            fields = gdb.lookup_type('PyUnicodeObject').target().fields()
1156            _is_pep393 = 'data' in [f.name for f in fields]
1157        if _is_pep393:
1158            # Python 3.3 and newer
1159            may_have_surrogates = False
1160            compact = self.field('_base')
1161            ascii = compact['_base']
1162            state = ascii['state']
1163            is_compact_ascii = (int(state['ascii']) and int(state['compact']))
1164            if not int(state['ready']):
1165                # string is not ready
1166                field_length = long(compact['wstr_length'])
1167                may_have_surrogates = True
1168                field_str = ascii['wstr']
1169            else:
1170                field_length = long(ascii['length'])
1171                if is_compact_ascii:
1172                    field_str = ascii.address + 1
1173                elif int(state['compact']):
1174                    field_str = compact.address + 1
1175                else:
1176                    field_str = self.field('data')['any']
1177                repr_kind = int(state['kind'])
1178                if repr_kind == 1:
1179                    field_str = field_str.cast(_type_unsigned_char_ptr())
1180                elif repr_kind == 2:
1181                    field_str = field_str.cast(_type_unsigned_short_ptr())
1182                elif repr_kind == 4:
1183                    field_str = field_str.cast(_type_unsigned_int_ptr())
1184        else:
1185            # Python 3.2 and earlier
1186            field_length = long(self.field('length'))
1187            field_str = self.field('str')
1188            may_have_surrogates = self.char_width() == 2
1189
1190        # Gather a list of ints from the Py_UNICODE array; these are either
1191        # UCS-1, UCS-2 or UCS-4 code points:
1192        if not may_have_surrogates:
1193            Py_UNICODEs = [int(field_str[i]) for i in safe_range(field_length)]
1194        else:
1195            # A more elaborate routine if sizeof(Py_UNICODE) is 2 in the
1196            # inferior process: we must join surrogate pairs.
1197            Py_UNICODEs = []
1198            i = 0
1199            limit = safety_limit(field_length)
1200            while i < limit:
1201                ucs = int(field_str[i])
1202                i += 1
1203                if ucs < 0xD800 or ucs >= 0xDC00 or i == field_length:
1204                    Py_UNICODEs.append(ucs)
1205                    continue
1206                # This could be a surrogate pair.
1207                ucs2 = int(field_str[i])
1208                if ucs2 < 0xDC00 or ucs2 > 0xDFFF:
1209                    continue
1210                code = (ucs & 0x03FF) << 10
1211                code |= ucs2 & 0x03FF
1212                code += 0x00010000
1213                Py_UNICODEs.append(code)
1214                i += 1
1215
1216        # Convert the int code points to unicode characters, and generate a
1217        # local unicode instance.
1218        # This splits surrogate pairs if sizeof(Py_UNICODE) is 2 here (in gdb).
1219        result = u''.join([
1220            (_unichr(ucs) if ucs <= 0x10ffff else '\ufffd')
1221            for ucs in Py_UNICODEs])
1222        return result
1223
1224    def write_repr(self, out, visited):
1225        # Write this out as a Python 3 str literal, i.e. without a "u" prefix
1226
1227        # Get a PyUnicodeObject* within the Python 2 gdb process:
1228        proxy = self.proxyval(visited)
1229
1230        # Transliteration of Python 3's Object/unicodeobject.c:unicode_repr
1231        # to Python 2:
1232        if "'" in proxy and '"' not in proxy:
1233            quote = '"'
1234        else:
1235            quote = "'"
1236        out.write(quote)
1237
1238        i = 0
1239        while i < len(proxy):
1240            ch = proxy[i]
1241            i += 1
1242
1243            # Escape quotes and backslashes
1244            if ch == quote or ch == '\\':
1245                out.write('\\')
1246                out.write(ch)
1247
1248            #  Map special whitespace to '\t', \n', '\r'
1249            elif ch == '\t':
1250                out.write('\\t')
1251            elif ch == '\n':
1252                out.write('\\n')
1253            elif ch == '\r':
1254                out.write('\\r')
1255
1256            # Map non-printable US ASCII to '\xhh' */
1257            elif ch < ' ' or ch == 0x7F:
1258                out.write('\\x')
1259                out.write(hexdigits[(ord(ch) >> 4) & 0x000F])
1260                out.write(hexdigits[ord(ch) & 0x000F])
1261
1262            # Copy ASCII characters as-is
1263            elif ord(ch) < 0x7F:
1264                out.write(ch)
1265
1266            # Non-ASCII characters
1267            else:
1268                ucs = ch
1269                ch2 = None
1270                if sys.maxunicode < 0x10000:
1271                    # If sizeof(Py_UNICODE) is 2 here (in gdb), join
1272                    # surrogate pairs before calling _unichr_is_printable.
1273                    if (i < len(proxy)
1274                    and 0xD800 <= ord(ch) < 0xDC00 \
1275                    and 0xDC00 <= ord(proxy[i]) <= 0xDFFF):
1276                        ch2 = proxy[i]
1277                        ucs = ch + ch2
1278                        i += 1
1279
1280                # Unfortuately, Python 2's unicode type doesn't seem
1281                # to expose the "isprintable" method
1282                printable = _unichr_is_printable(ucs)
1283                if printable:
1284                    try:
1285                        ucs.encode(ENCODING)
1286                    except UnicodeEncodeError:
1287                        printable = False
1288
1289                # Map Unicode whitespace and control characters
1290                # (categories Z* and C* except ASCII space)
1291                if not printable:
1292                    if ch2 is not None:
1293                        # Match Python 3's representation of non-printable
1294                        # wide characters.
1295                        code = (ord(ch) & 0x03FF) << 10
1296                        code |= ord(ch2) & 0x03FF
1297                        code += 0x00010000
1298                    else:
1299                        code = ord(ucs)
1300
1301                    # Map 8-bit characters to '\\xhh'
1302                    if code <= 0xff:
1303                        out.write('\\x')
1304                        out.write(hexdigits[(code >> 4) & 0x000F])
1305                        out.write(hexdigits[code & 0x000F])
1306                    # Map 21-bit characters to '\U00xxxxxx'
1307                    elif code >= 0x10000:
1308                        out.write('\\U')
1309                        out.write(hexdigits[(code >> 28) & 0x0000000F])
1310                        out.write(hexdigits[(code >> 24) & 0x0000000F])
1311                        out.write(hexdigits[(code >> 20) & 0x0000000F])
1312                        out.write(hexdigits[(code >> 16) & 0x0000000F])
1313                        out.write(hexdigits[(code >> 12) & 0x0000000F])
1314                        out.write(hexdigits[(code >> 8) & 0x0000000F])
1315                        out.write(hexdigits[(code >> 4) & 0x0000000F])
1316                        out.write(hexdigits[code & 0x0000000F])
1317                    # Map 16-bit characters to '\uxxxx'
1318                    else:
1319                        out.write('\\u')
1320                        out.write(hexdigits[(code >> 12) & 0x000F])
1321                        out.write(hexdigits[(code >> 8) & 0x000F])
1322                        out.write(hexdigits[(code >> 4) & 0x000F])
1323                        out.write(hexdigits[code & 0x000F])
1324                else:
1325                    # Copy characters as-is
1326                    out.write(ch)
1327                    if ch2 is not None:
1328                        out.write(ch2)
1329
1330        out.write(quote)
1331
1332
1333
1334
1335def int_from_int(gdbval):
1336    return int(str(gdbval))
1337
1338
1339def stringify(val):
1340    # TODO: repr() puts everything on one line; pformat can be nicer, but
1341    # can lead to v.long results; this function isolates the choice
1342    if True:
1343        return repr(val)
1344    else:
1345        from pprint import pformat
1346        return pformat(val)
1347
1348
1349class PyObjectPtrPrinter:
1350    "Prints a (PyObject*)"
1351
1352    def __init__ (self, gdbval):
1353        self.gdbval = gdbval
1354
1355    def to_string (self):
1356        pyop = PyObjectPtr.from_pyobject_ptr(self.gdbval)
1357        if True:
1358            return pyop.get_truncated_repr(MAX_OUTPUT_LEN)
1359        else:
1360            # Generate full proxy value then stringify it.
1361            # Doing so could be expensive
1362            proxyval = pyop.proxyval(set())
1363            return stringify(proxyval)
1364
1365def pretty_printer_lookup(gdbval):
1366    type = gdbval.type.unqualified()
1367    if type.code == gdb.TYPE_CODE_PTR:
1368        type = type.target().unqualified()
1369        t = str(type)
1370        if t in ("PyObject", "PyFrameObject", "PyUnicodeObject"):
1371            return PyObjectPtrPrinter(gdbval)
1372
1373"""
1374During development, I've been manually invoking the code in this way:
1375(gdb) python
1376
1377import sys
1378sys.path.append('/home/david/coding/python-gdb')
1379import libpython
1380end
1381
1382then reloading it after each edit like this:
1383(gdb) python reload(libpython)
1384
1385The following code should ensure that the prettyprinter is registered
1386if the code is autoloaded by gdb when visiting libpython.so, provided
1387that this python file is installed to the same path as the library (or its
1388.debug file) plus a "-gdb.py" suffix, e.g:
1389  /usr/lib/libpython2.6.so.1.0-gdb.py
1390  /usr/lib/debug/usr/lib/libpython2.6.so.1.0.debug-gdb.py
1391"""
1392def register (obj):
1393    if obj is None:
1394        obj = gdb
1395
1396    # Wire up the pretty-printer
1397    obj.pretty_printers.append(pretty_printer_lookup)
1398
1399register (gdb.current_objfile ())
1400
1401
1402
1403# Unfortunately, the exact API exposed by the gdb module varies somewhat
1404# from build to build
1405# See http://bugs.python.org/issue8279?#msg102276
1406
1407class Frame(object):
1408    '''
1409    Wrapper for gdb.Frame, adding various methods
1410    '''
1411    def __init__(self, gdbframe):
1412        self._gdbframe = gdbframe
1413
1414    def older(self):
1415        older = self._gdbframe.older()
1416        if older:
1417            return Frame(older)
1418        else:
1419            return None
1420
1421    def newer(self):
1422        newer = self._gdbframe.newer()
1423        if newer:
1424            return Frame(newer)
1425        else:
1426            return None
1427
1428    def select(self):
1429        '''If supported, select this frame and return True; return False if unsupported
1430
1431        Not all builds have a gdb.Frame.select method; seems to be present on Fedora 12
1432        onwards, but absent on Ubuntu buildbot'''
1433        if not hasattr(self._gdbframe, 'select'):
1434            print ('Unable to select frame: '
1435                   'this build of gdb does not expose a gdb.Frame.select method')
1436            return False
1437        self._gdbframe.select()
1438        return True
1439
1440    def get_index(self):
1441        '''Calculate index of frame, starting at 0 for the newest frame within
1442        this thread'''
1443        index = 0
1444        # Go down until you reach the newest frame:
1445        iter_frame = self
1446        while iter_frame.newer():
1447            index += 1
1448            iter_frame = iter_frame.newer()
1449        return index
1450
1451    # We divide frames into:
1452    #   - "python frames":
1453    #       - "bytecode frames" i.e. PyEval_EvalFrameEx
1454    #       - "other python frames": things that are of interest from a python
1455    #         POV, but aren't bytecode (e.g. GC, GIL)
1456    #   - everything else
1457
1458    def is_python_frame(self):
1459        '''Is this a PyEval_EvalFrameEx frame, or some other important
1460        frame? (see is_other_python_frame for what "important" means in this
1461        context)'''
1462        if self.is_evalframeex():
1463            return True
1464        if self.is_other_python_frame():
1465            return True
1466        return False
1467
1468    def is_evalframeex(self):
1469        '''Is this a PyEval_EvalFrameEx frame?'''
1470        if self._gdbframe.name() == 'PyEval_EvalFrameEx':
1471            '''
1472            I believe we also need to filter on the inline
1473            struct frame_id.inline_depth, only regarding frames with
1474            an inline depth of 0 as actually being this function
1475
1476            So we reject those with type gdb.INLINE_FRAME
1477            '''
1478            if self._gdbframe.type() == gdb.NORMAL_FRAME:
1479                # We have a PyEval_EvalFrameEx frame:
1480                return True
1481
1482        return False
1483
1484    def is_other_python_frame(self):
1485        '''Is this frame worth displaying in python backtraces?
1486        Examples:
1487          - waiting on the GIL
1488          - garbage-collecting
1489          - within a CFunction
1490         If it is, return a descriptive string
1491         For other frames, return False
1492         '''
1493        if self.is_waiting_for_gil():
1494            return 'Waiting for the GIL'
1495
1496        if self.is_gc_collect():
1497            return 'Garbage-collecting'
1498
1499        # Detect invocations of PyCFunction instances:
1500        older = self.older()
1501        if not older:
1502            return False
1503
1504        caller = older._gdbframe.name()
1505        if not caller:
1506            return False
1507
1508        if caller == 'PyCFunction_Call':
1509            # Within that frame:
1510            #   "func" is the local containing the PyObject* of the
1511            # PyCFunctionObject instance
1512            #   "f" is the same value, but cast to (PyCFunctionObject*)
1513            #   "self" is the (PyObject*) of the 'self'
1514            try:
1515                # Use the prettyprinter for the func:
1516                func = older._gdbframe.read_var('func')
1517                return str(func)
1518            except RuntimeError:
1519                return 'PyCFunction invocation (unable to read "func")'
1520
1521        elif caller == '_PyCFunction_FastCallDict':
1522            try:
1523                func = older._gdbframe.read_var('func_obj')
1524                return str(func)
1525            except RuntimeError:
1526                return 'PyCFunction invocation (unable to read "func_obj")'
1527
1528        # This frame isn't worth reporting:
1529        return False
1530
1531    def is_waiting_for_gil(self):
1532        '''Is this frame waiting on the GIL?'''
1533        # This assumes the _POSIX_THREADS version of Python/ceval_gil.h:
1534        name = self._gdbframe.name()
1535        if name:
1536            return 'pthread_cond_timedwait' in name
1537
1538    def is_gc_collect(self):
1539        '''Is this frame "collect" within the garbage-collector?'''
1540        return self._gdbframe.name() == 'collect'
1541
1542    def get_pyop(self):
1543        try:
1544            f = self._gdbframe.read_var('f')
1545            frame = PyFrameObjectPtr.from_pyobject_ptr(f)
1546            if not frame.is_optimized_out():
1547                return frame
1548            # gdb is unable to get the "f" argument of PyEval_EvalFrameEx()
1549            # because it was "optimized out". Try to get "f" from the frame
1550            # of the caller, PyEval_EvalCodeEx().
1551            orig_frame = frame
1552            caller = self._gdbframe.older()
1553            if caller:
1554                f = caller.read_var('f')
1555                frame = PyFrameObjectPtr.from_pyobject_ptr(f)
1556                if not frame.is_optimized_out():
1557                    return frame
1558            return orig_frame
1559        except ValueError:
1560            return None
1561
1562    @classmethod
1563    def get_selected_frame(cls):
1564        _gdbframe = gdb.selected_frame()
1565        if _gdbframe:
1566            return Frame(_gdbframe)
1567        return None
1568
1569    @classmethod
1570    def get_selected_python_frame(cls):
1571        '''Try to obtain the Frame for the python-related code in the selected
1572        frame, or None'''
1573        try:
1574            frame = cls.get_selected_frame()
1575        except gdb.error:
1576            # No frame: Python didn't start yet
1577            return None
1578
1579        while frame:
1580            if frame.is_python_frame():
1581                return frame
1582            frame = frame.older()
1583
1584        # Not found:
1585        return None
1586
1587    @classmethod
1588    def get_selected_bytecode_frame(cls):
1589        '''Try to obtain the Frame for the python bytecode interpreter in the
1590        selected GDB frame, or None'''
1591        frame = cls.get_selected_frame()
1592
1593        while frame:
1594            if frame.is_evalframeex():
1595                return frame
1596            frame = frame.older()
1597
1598        # Not found:
1599        return None
1600
1601    def print_summary(self):
1602        if self.is_evalframeex():
1603            pyop = self.get_pyop()
1604            if pyop:
1605                line = pyop.get_truncated_repr(MAX_OUTPUT_LEN)
1606                write_unicode(sys.stdout, '#%i %s\n' % (self.get_index(), line))
1607                if not pyop.is_optimized_out():
1608                    line = pyop.current_line()
1609                    if line is not None:
1610                        sys.stdout.write('    %s\n' % line.strip())
1611            else:
1612                sys.stdout.write('#%i (unable to read python frame information)\n' % self.get_index())
1613        else:
1614            info = self.is_other_python_frame()
1615            if info:
1616                sys.stdout.write('#%i %s\n' % (self.get_index(), info))
1617            else:
1618                sys.stdout.write('#%i\n' % self.get_index())
1619
1620    def print_traceback(self):
1621        if self.is_evalframeex():
1622            pyop = self.get_pyop()
1623            if pyop:
1624                pyop.print_traceback()
1625                if not pyop.is_optimized_out():
1626                    line = pyop.current_line()
1627                    if line is not None:
1628                        sys.stdout.write('    %s\n' % line.strip())
1629            else:
1630                sys.stdout.write('  (unable to read python frame information)\n')
1631        else:
1632            info = self.is_other_python_frame()
1633            if info:
1634                sys.stdout.write('  %s\n' % info)
1635            else:
1636                sys.stdout.write('  (not a python frame)\n')
1637
1638class PyList(gdb.Command):
1639    '''List the current Python source code, if any
1640
1641    Use
1642       py-list START
1643    to list at a different line number within the python source.
1644
1645    Use
1646       py-list START, END
1647    to list a specific range of lines within the python source.
1648    '''
1649
1650    def __init__(self):
1651        gdb.Command.__init__ (self,
1652                              "py-list",
1653                              gdb.COMMAND_FILES,
1654                              gdb.COMPLETE_NONE)
1655
1656
1657    def invoke(self, args, from_tty):
1658        import re
1659
1660        start = None
1661        end = None
1662
1663        m = re.match(r'\s*(\d+)\s*', args)
1664        if m:
1665            start = int(m.group(0))
1666            end = start + 10
1667
1668        m = re.match(r'\s*(\d+)\s*,\s*(\d+)\s*', args)
1669        if m:
1670            start, end = map(int, m.groups())
1671
1672        # py-list requires an actual PyEval_EvalFrameEx frame:
1673        frame = Frame.get_selected_bytecode_frame()
1674        if not frame:
1675            print('Unable to locate gdb frame for python bytecode interpreter')
1676            return
1677
1678        pyop = frame.get_pyop()
1679        if not pyop or pyop.is_optimized_out():
1680            print('Unable to read information on python frame')
1681            return
1682
1683        filename = pyop.filename()
1684        lineno = pyop.current_line_num()
1685
1686        if start is None:
1687            start = lineno - 5
1688            end = lineno + 5
1689
1690        if start<1:
1691            start = 1
1692
1693        try:
1694            f = open(os_fsencode(filename), 'r')
1695        except IOError as err:
1696            sys.stdout.write('Unable to open %s: %s\n'
1697                             % (filename, err))
1698            return
1699        with f:
1700            all_lines = f.readlines()
1701            # start and end are 1-based, all_lines is 0-based;
1702            # so [start-1:end] as a python slice gives us [start, end] as a
1703            # closed interval
1704            for i, line in enumerate(all_lines[start-1:end]):
1705                linestr = str(i+start)
1706                # Highlight current line:
1707                if i + start == lineno:
1708                    linestr = '>' + linestr
1709                sys.stdout.write('%4s    %s' % (linestr, line))
1710
1711
1712# ...and register the command:
1713PyList()
1714
1715def move_in_stack(move_up):
1716    '''Move up or down the stack (for the py-up/py-down command)'''
1717    frame = Frame.get_selected_python_frame()
1718    if not frame:
1719        print('Unable to locate python frame')
1720        return
1721
1722    while frame:
1723        if move_up:
1724            iter_frame = frame.older()
1725        else:
1726            iter_frame = frame.newer()
1727
1728        if not iter_frame:
1729            break
1730
1731        if iter_frame.is_python_frame():
1732            # Result:
1733            if iter_frame.select():
1734                iter_frame.print_summary()
1735            return
1736
1737        frame = iter_frame
1738
1739    if move_up:
1740        print('Unable to find an older python frame')
1741    else:
1742        print('Unable to find a newer python frame')
1743
1744class PyUp(gdb.Command):
1745    'Select and print the python stack frame that called this one (if any)'
1746    def __init__(self):
1747        gdb.Command.__init__ (self,
1748                              "py-up",
1749                              gdb.COMMAND_STACK,
1750                              gdb.COMPLETE_NONE)
1751
1752
1753    def invoke(self, args, from_tty):
1754        move_in_stack(move_up=True)
1755
1756class PyDown(gdb.Command):
1757    'Select and print the python stack frame called by this one (if any)'
1758    def __init__(self):
1759        gdb.Command.__init__ (self,
1760                              "py-down",
1761                              gdb.COMMAND_STACK,
1762                              gdb.COMPLETE_NONE)
1763
1764
1765    def invoke(self, args, from_tty):
1766        move_in_stack(move_up=False)
1767
1768# Not all builds of gdb have gdb.Frame.select
1769if hasattr(gdb.Frame, 'select'):
1770    PyUp()
1771    PyDown()
1772
1773class PyBacktraceFull(gdb.Command):
1774    'Display the current python frame and all the frames within its call stack (if any)'
1775    def __init__(self):
1776        gdb.Command.__init__ (self,
1777                              "py-bt-full",
1778                              gdb.COMMAND_STACK,
1779                              gdb.COMPLETE_NONE)
1780
1781
1782    def invoke(self, args, from_tty):
1783        frame = Frame.get_selected_python_frame()
1784        if not frame:
1785            print('Unable to locate python frame')
1786            return
1787
1788        while frame:
1789            if frame.is_python_frame():
1790                frame.print_summary()
1791            frame = frame.older()
1792
1793PyBacktraceFull()
1794
1795class PyBacktrace(gdb.Command):
1796    'Display the current python frame and all the frames within its call stack (if any)'
1797    def __init__(self):
1798        gdb.Command.__init__ (self,
1799                              "py-bt",
1800                              gdb.COMMAND_STACK,
1801                              gdb.COMPLETE_NONE)
1802
1803
1804    def invoke(self, args, from_tty):
1805        frame = Frame.get_selected_python_frame()
1806        if not frame:
1807            print('Unable to locate python frame')
1808            return
1809
1810        sys.stdout.write('Traceback (most recent call first):\n')
1811        while frame:
1812            if frame.is_python_frame():
1813                frame.print_traceback()
1814            frame = frame.older()
1815
1816PyBacktrace()
1817
1818class PyPrint(gdb.Command):
1819    'Look up the given python variable name, and print it'
1820    def __init__(self):
1821        gdb.Command.__init__ (self,
1822                              "py-print",
1823                              gdb.COMMAND_DATA,
1824                              gdb.COMPLETE_NONE)
1825
1826
1827    def invoke(self, args, from_tty):
1828        name = str(args)
1829
1830        frame = Frame.get_selected_python_frame()
1831        if not frame:
1832            print('Unable to locate python frame')
1833            return
1834
1835        pyop_frame = frame.get_pyop()
1836        if not pyop_frame:
1837            print('Unable to read information on python frame')
1838            return
1839
1840        pyop_var, scope = pyop_frame.get_var_by_name(name)
1841
1842        if pyop_var:
1843            print('%s %r = %s'
1844                   % (scope,
1845                      name,
1846                      pyop_var.get_truncated_repr(MAX_OUTPUT_LEN)))
1847        else:
1848            print('%r not found' % name)
1849
1850PyPrint()
1851
1852class PyLocals(gdb.Command):
1853    'Look up the given python variable name, and print it'
1854    def __init__(self):
1855        gdb.Command.__init__ (self,
1856                              "py-locals",
1857                              gdb.COMMAND_DATA,
1858                              gdb.COMPLETE_NONE)
1859
1860
1861    def invoke(self, args, from_tty):
1862        name = str(args)
1863
1864        frame = Frame.get_selected_python_frame()
1865        if not frame:
1866            print('Unable to locate python frame')
1867            return
1868
1869        pyop_frame = frame.get_pyop()
1870        if not pyop_frame:
1871            print('Unable to read information on python frame')
1872            return
1873
1874        for pyop_name, pyop_value in pyop_frame.iter_locals():
1875            print('%s = %s'
1876                   % (pyop_name.proxyval(set()),
1877                      pyop_value.get_truncated_repr(MAX_OUTPUT_LEN)))
1878
1879PyLocals()
1880