• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#!/usr/bin/python
2'''
3From gdb 7 onwards, gdb's build can be configured --with-python, allowing gdb
4to be extended with Python code e.g. for library-specific data visualizations,
5such as for the C++ STL types.  Documentation on this API can be seen at:
6http://sourceware.org/gdb/current/onlinedocs/gdb/Python-API.html
7
8
9This python module deals with the case when the process being debugged (the
10"inferior process" in gdb parlance) is itself python, or more specifically,
11linked against libpython.  In this situation, almost every item of data is a
12(PyObject*), and having the debugger merely print their addresses is not very
13enlightening.
14
15This module embeds knowledge about the implementation details of libpython so
16that we can emit useful visualizations e.g. a string, a list, a dict, a frame
17giving file/line information and the state of local variables
18
19In particular, given a gdb.Value corresponding to a PyObject* in the inferior
20process, we can generate a "proxy value" within the gdb process.  For example,
21given a PyObject* in the inferior process that is in fact a PyListObject*
22holding three PyObject* that turn out to be PyBytesObject* instances, we can
23generate a proxy value within the gdb process that is a list of bytes
24instances:
25  [b"foo", b"bar", b"baz"]
26
27Doing so can be expensive for complicated graphs of objects, and could take
28some time, so we also have a "write_repr" method that writes a representation
29of the data to a file-like object.  This allows us to stop the traversal by
30having the file-like object raise an exception if it gets too much data.
31
32With both "proxyval" and "write_repr" we keep track of the set of all addresses
33visited so far in the traversal, to avoid infinite recursion due to cycles in
34the graph of object references.
35
36We try to defer gdb.lookup_type() invocations for python types until as late as
37possible: for a dynamically linked python binary, when the process starts in
38the debugger, the libpython.so hasn't been dynamically loaded yet, so none of
39the type names are known to the debugger
40
41The module also extends gdb with some python-specific commands.
42'''
43
44# NOTE: some gdbs are linked with Python 3, so this file should be dual-syntax
45# compatible (2.6+ and 3.0+).  See #19308.
46
47from __future__ import print_function
48import gdb
49import os
50import locale
51import sys
52
53if sys.version_info[0] >= 3:
54    unichr = chr
55    xrange = range
56    long = int
57
58# Look up the gdb.Type for some standard types:
59# Those need to be refreshed as types (pointer sizes) may change when
60# gdb loads different executables
61
62def _type_char_ptr():
63    return gdb.lookup_type('char').pointer()  # char*
64
65
66def _type_unsigned_char_ptr():
67    return gdb.lookup_type('unsigned char').pointer()  # unsigned char*
68
69
70def _type_unsigned_short_ptr():
71    return gdb.lookup_type('unsigned short').pointer()
72
73
74def _type_unsigned_int_ptr():
75    return gdb.lookup_type('unsigned int').pointer()
76
77
78def _sizeof_void_p():
79    return gdb.lookup_type('void').pointer().sizeof
80
81
82# value computed later, see PyUnicodeObjectPtr.proxy()
83_is_pep393 = None
84
85Py_TPFLAGS_HEAPTYPE = (1 << 9)
86Py_TPFLAGS_LONG_SUBCLASS     = (1 << 24)
87Py_TPFLAGS_LIST_SUBCLASS     = (1 << 25)
88Py_TPFLAGS_TUPLE_SUBCLASS    = (1 << 26)
89Py_TPFLAGS_BYTES_SUBCLASS    = (1 << 27)
90Py_TPFLAGS_UNICODE_SUBCLASS  = (1 << 28)
91Py_TPFLAGS_DICT_SUBCLASS     = (1 << 29)
92Py_TPFLAGS_BASE_EXC_SUBCLASS = (1 << 30)
93Py_TPFLAGS_TYPE_SUBCLASS     = (1 << 31)
94
95
96MAX_OUTPUT_LEN=1024
97
98hexdigits = "0123456789abcdef"
99
100ENCODING = locale.getpreferredencoding()
101
102FRAME_INFO_OPTIMIZED_OUT = '(frame information optimized out)'
103UNABLE_READ_INFO_PYTHON_FRAME = 'Unable to read information on python frame'
104EVALFRAME = '_PyEval_EvalFrameDefault'
105
106class NullPyObjectPtr(RuntimeError):
107    pass
108
109
110def safety_limit(val):
111    # Given an integer value from the process being debugged, limit it to some
112    # safety threshold so that arbitrary breakage within said process doesn't
113    # break the gdb process too much (e.g. sizes of iterations, sizes of lists)
114    return min(val, 1000)
115
116
117def safe_range(val):
118    # As per range, but don't trust the value too much: cap it to a safety
119    # threshold in case the data was corrupted
120    return xrange(safety_limit(int(val)))
121
122if sys.version_info[0] >= 3:
123    def write_unicode(file, text):
124        file.write(text)
125else:
126    def write_unicode(file, text):
127        # Write a byte or unicode string to file. Unicode strings are encoded to
128        # ENCODING encoding with 'backslashreplace' error handler to avoid
129        # UnicodeEncodeError.
130        if isinstance(text, unicode):
131            text = text.encode(ENCODING, 'backslashreplace')
132        file.write(text)
133
134try:
135    os_fsencode = os.fsencode
136except AttributeError:
137    def os_fsencode(filename):
138        if not isinstance(filename, unicode):
139            return filename
140        encoding = sys.getfilesystemencoding()
141        if encoding == 'mbcs':
142            # mbcs doesn't support surrogateescape
143            return filename.encode(encoding)
144        encoded = []
145        for char in filename:
146            # surrogateescape error handler
147            if 0xDC80 <= ord(char) <= 0xDCFF:
148                byte = chr(ord(char) - 0xDC00)
149            else:
150                byte = char.encode(encoding)
151            encoded.append(byte)
152        return ''.join(encoded)
153
154class StringTruncated(RuntimeError):
155    pass
156
157class TruncatedStringIO(object):
158    '''Similar to io.StringIO, but can truncate the output by raising a
159    StringTruncated exception'''
160    def __init__(self, maxlen=None):
161        self._val = ''
162        self.maxlen = maxlen
163
164    def write(self, data):
165        if self.maxlen:
166            if len(data) + len(self._val) > self.maxlen:
167                # Truncation:
168                self._val += data[0:self.maxlen - len(self._val)]
169                raise StringTruncated()
170
171        self._val += data
172
173    def getvalue(self):
174        return self._val
175
176class PyObjectPtr(object):
177    """
178    Class wrapping a gdb.Value that's either a (PyObject*) within the
179    inferior process, or some subclass pointer e.g. (PyBytesObject*)
180
181    There will be a subclass for every refined PyObject type that we care
182    about.
183
184    Note that at every stage the underlying pointer could be NULL, point
185    to corrupt data, etc; this is the debugger, after all.
186    """
187    _typename = 'PyObject'
188
189    def __init__(self, gdbval, cast_to=None):
190        if cast_to:
191            self._gdbval = gdbval.cast(cast_to)
192        else:
193            self._gdbval = gdbval
194
195    def field(self, name):
196        '''
197        Get the gdb.Value for the given field within the PyObject, coping with
198        some python 2 versus python 3 differences.
199
200        Various libpython types are defined using the "PyObject_HEAD" and
201        "PyObject_VAR_HEAD" macros.
202
203        In Python 2, this these are defined so that "ob_type" and (for a var
204        object) "ob_size" are fields of the type in question.
205
206        In Python 3, this is defined as an embedded PyVarObject type thus:
207           PyVarObject ob_base;
208        so that the "ob_size" field is located insize the "ob_base" field, and
209        the "ob_type" is most easily accessed by casting back to a (PyObject*).
210        '''
211        if self.is_null():
212            raise NullPyObjectPtr(self)
213
214        if name == 'ob_type':
215            pyo_ptr = self._gdbval.cast(PyObjectPtr.get_gdb_type())
216            return pyo_ptr.dereference()[name]
217
218        if name == 'ob_size':
219            pyo_ptr = self._gdbval.cast(PyVarObjectPtr.get_gdb_type())
220            return pyo_ptr.dereference()[name]
221
222        # General case: look it up inside the object:
223        return self._gdbval.dereference()[name]
224
225    def pyop_field(self, name):
226        '''
227        Get a PyObjectPtr for the given PyObject* field within this PyObject,
228        coping with some python 2 versus python 3 differences.
229        '''
230        return PyObjectPtr.from_pyobject_ptr(self.field(name))
231
232    def write_field_repr(self, name, out, visited):
233        '''
234        Extract the PyObject* field named "name", and write its representation
235        to file-like object "out"
236        '''
237        field_obj = self.pyop_field(name)
238        field_obj.write_repr(out, visited)
239
240    def get_truncated_repr(self, maxlen):
241        '''
242        Get a repr-like string for the data, but truncate it at "maxlen" bytes
243        (ending the object graph traversal as soon as you do)
244        '''
245        out = TruncatedStringIO(maxlen)
246        try:
247            self.write_repr(out, set())
248        except StringTruncated:
249            # Truncation occurred:
250            return out.getvalue() + '...(truncated)'
251
252        # No truncation occurred:
253        return out.getvalue()
254
255    def type(self):
256        return PyTypeObjectPtr(self.field('ob_type'))
257
258    def is_null(self):
259        return 0 == long(self._gdbval)
260
261    def is_optimized_out(self):
262        '''
263        Is the value of the underlying PyObject* visible to the debugger?
264
265        This can vary with the precise version of the compiler used to build
266        Python, and the precise version of gdb.
267
268        See e.g. https://bugzilla.redhat.com/show_bug.cgi?id=556975 with
269        PyEval_EvalFrameEx's "f"
270        '''
271        return self._gdbval.is_optimized_out
272
273    def safe_tp_name(self):
274        try:
275            ob_type = self.type()
276            tp_name = ob_type.field('tp_name')
277            return tp_name.string()
278        # NullPyObjectPtr: NULL tp_name?
279        # RuntimeError: Can't even read the object at all?
280        # UnicodeDecodeError: Failed to decode tp_name bytestring
281        except (NullPyObjectPtr, RuntimeError, UnicodeDecodeError):
282            return 'unknown'
283
284    def proxyval(self, visited):
285        '''
286        Scrape a value from the inferior process, and try to represent it
287        within the gdb process, whilst (hopefully) avoiding crashes when
288        the remote data is corrupt.
289
290        Derived classes will override this.
291
292        For example, a PyIntObject* with ob_ival 42 in the inferior process
293        should result in an int(42) in this process.
294
295        visited: a set of all gdb.Value pyobject pointers already visited
296        whilst generating this value (to guard against infinite recursion when
297        visiting object graphs with loops).  Analogous to Py_ReprEnter and
298        Py_ReprLeave
299        '''
300
301        class FakeRepr(object):
302            """
303            Class representing a non-descript PyObject* value in the inferior
304            process for when we don't have a custom scraper, intended to have
305            a sane repr().
306            """
307
308            def __init__(self, tp_name, address):
309                self.tp_name = tp_name
310                self.address = address
311
312            def __repr__(self):
313                # For the NULL pointer, we have no way of knowing a type, so
314                # special-case it as per
315                # http://bugs.python.org/issue8032#msg100882
316                if self.address == 0:
317                    return '0x0'
318                return '<%s at remote 0x%x>' % (self.tp_name, self.address)
319
320        return FakeRepr(self.safe_tp_name(),
321                        long(self._gdbval))
322
323    def write_repr(self, out, visited):
324        '''
325        Write a string representation of the value scraped from the inferior
326        process to "out", a file-like object.
327        '''
328        # Default implementation: generate a proxy value and write its repr
329        # However, this could involve a lot of work for complicated objects,
330        # so for derived classes we specialize this
331        return out.write(repr(self.proxyval(visited)))
332
333    @classmethod
334    def subclass_from_type(cls, t):
335        '''
336        Given a PyTypeObjectPtr instance wrapping a gdb.Value that's a
337        (PyTypeObject*), determine the corresponding subclass of PyObjectPtr
338        to use
339
340        Ideally, we would look up the symbols for the global types, but that
341        isn't working yet:
342          (gdb) python print gdb.lookup_symbol('PyList_Type')[0].value
343          Traceback (most recent call last):
344            File "<string>", line 1, in <module>
345          NotImplementedError: Symbol type not yet supported in Python scripts.
346          Error while executing Python code.
347
348        For now, we use tp_flags, after doing some string comparisons on the
349        tp_name for some special-cases that don't seem to be visible through
350        flags
351        '''
352        try:
353            tp_name = t.field('tp_name').string()
354            tp_flags = int(t.field('tp_flags'))
355        # RuntimeError: NULL pointers
356        # UnicodeDecodeError: string() fails to decode the bytestring
357        except (RuntimeError, UnicodeDecodeError):
358            # Handle any kind of error e.g. NULL ptrs by simply using the base
359            # class
360            return cls
361
362        #print('tp_flags = 0x%08x' % tp_flags)
363        #print('tp_name = %r' % tp_name)
364
365        name_map = {'bool': PyBoolObjectPtr,
366                    'classobj': PyClassObjectPtr,
367                    'NoneType': PyNoneStructPtr,
368                    'frame': PyFrameObjectPtr,
369                    'set' : PySetObjectPtr,
370                    'frozenset' : PySetObjectPtr,
371                    'builtin_function_or_method' : PyCFunctionObjectPtr,
372                    'method-wrapper': wrapperobject,
373                    }
374        if tp_name in name_map:
375            return name_map[tp_name]
376
377        if tp_flags & Py_TPFLAGS_HEAPTYPE:
378            return HeapTypeObjectPtr
379
380        if tp_flags & Py_TPFLAGS_LONG_SUBCLASS:
381            return PyLongObjectPtr
382        if tp_flags & Py_TPFLAGS_LIST_SUBCLASS:
383            return PyListObjectPtr
384        if tp_flags & Py_TPFLAGS_TUPLE_SUBCLASS:
385            return PyTupleObjectPtr
386        if tp_flags & Py_TPFLAGS_BYTES_SUBCLASS:
387            return PyBytesObjectPtr
388        if tp_flags & Py_TPFLAGS_UNICODE_SUBCLASS:
389            return PyUnicodeObjectPtr
390        if tp_flags & Py_TPFLAGS_DICT_SUBCLASS:
391            return PyDictObjectPtr
392        if tp_flags & Py_TPFLAGS_BASE_EXC_SUBCLASS:
393            return PyBaseExceptionObjectPtr
394        #if tp_flags & Py_TPFLAGS_TYPE_SUBCLASS:
395        #    return PyTypeObjectPtr
396
397        # Use the base class:
398        return cls
399
400    @classmethod
401    def from_pyobject_ptr(cls, gdbval):
402        '''
403        Try to locate the appropriate derived class dynamically, and cast
404        the pointer accordingly.
405        '''
406        try:
407            p = PyObjectPtr(gdbval)
408            cls = cls.subclass_from_type(p.type())
409            return cls(gdbval, cast_to=cls.get_gdb_type())
410        except RuntimeError:
411            # Handle any kind of error e.g. NULL ptrs by simply using the base
412            # class
413            pass
414        return cls(gdbval)
415
416    @classmethod
417    def get_gdb_type(cls):
418        return gdb.lookup_type(cls._typename).pointer()
419
420    def as_address(self):
421        return long(self._gdbval)
422
423class PyVarObjectPtr(PyObjectPtr):
424    _typename = 'PyVarObject'
425
426class ProxyAlreadyVisited(object):
427    '''
428    Placeholder proxy to use when protecting against infinite recursion due to
429    loops in the object graph.
430
431    Analogous to the values emitted by the users of Py_ReprEnter and Py_ReprLeave
432    '''
433    def __init__(self, rep):
434        self._rep = rep
435
436    def __repr__(self):
437        return self._rep
438
439
440def _write_instance_repr(out, visited, name, pyop_attrdict, address):
441    '''Shared code for use by all classes:
442    write a representation to file-like object "out"'''
443    out.write('<')
444    out.write(name)
445
446    # Write dictionary of instance attributes:
447    if isinstance(pyop_attrdict, PyDictObjectPtr):
448        out.write('(')
449        first = True
450        for pyop_arg, pyop_val in pyop_attrdict.iteritems():
451            if not first:
452                out.write(', ')
453            first = False
454            out.write(pyop_arg.proxyval(visited))
455            out.write('=')
456            pyop_val.write_repr(out, visited)
457        out.write(')')
458    out.write(' at remote 0x%x>' % address)
459
460
461class InstanceProxy(object):
462
463    def __init__(self, cl_name, attrdict, address):
464        self.cl_name = cl_name
465        self.attrdict = attrdict
466        self.address = address
467
468    def __repr__(self):
469        if isinstance(self.attrdict, dict):
470            kwargs = ', '.join(["%s=%r" % (arg, val)
471                                for arg, val in self.attrdict.items()])
472            return '<%s(%s) at remote 0x%x>' % (self.cl_name,
473                                                kwargs, self.address)
474        else:
475            return '<%s at remote 0x%x>' % (self.cl_name,
476                                            self.address)
477
478def _PyObject_VAR_SIZE(typeobj, nitems):
479    if _PyObject_VAR_SIZE._type_size_t is None:
480        _PyObject_VAR_SIZE._type_size_t = gdb.lookup_type('size_t')
481
482    return ( ( typeobj.field('tp_basicsize') +
483               nitems * typeobj.field('tp_itemsize') +
484               (_sizeof_void_p() - 1)
485             ) & ~(_sizeof_void_p() - 1)
486           ).cast(_PyObject_VAR_SIZE._type_size_t)
487_PyObject_VAR_SIZE._type_size_t = None
488
489class HeapTypeObjectPtr(PyObjectPtr):
490    _typename = 'PyObject'
491
492    def get_attr_dict(self):
493        '''
494        Get the PyDictObject ptr representing the attribute dictionary
495        (or None if there's a problem)
496        '''
497        try:
498            typeobj = self.type()
499            dictoffset = int_from_int(typeobj.field('tp_dictoffset'))
500            if dictoffset != 0:
501                if dictoffset < 0:
502                    type_PyVarObject_ptr = gdb.lookup_type('PyVarObject').pointer()
503                    tsize = int_from_int(self._gdbval.cast(type_PyVarObject_ptr)['ob_size'])
504                    if tsize < 0:
505                        tsize = -tsize
506                    size = _PyObject_VAR_SIZE(typeobj, tsize)
507                    dictoffset += size
508                    assert dictoffset > 0
509                    assert dictoffset % _sizeof_void_p() == 0
510
511                dictptr = self._gdbval.cast(_type_char_ptr()) + dictoffset
512                PyObjectPtrPtr = PyObjectPtr.get_gdb_type().pointer()
513                dictptr = dictptr.cast(PyObjectPtrPtr)
514                return PyObjectPtr.from_pyobject_ptr(dictptr.dereference())
515        except RuntimeError:
516            # Corrupt data somewhere; fail safe
517            pass
518
519        # Not found, or some kind of error:
520        return None
521
522    def proxyval(self, visited):
523        '''
524        Support for classes.
525
526        Currently we just locate the dictionary using a transliteration to
527        python of _PyObject_GetDictPtr, ignoring descriptors
528        '''
529        # Guard against infinite loops:
530        if self.as_address() in visited:
531            return ProxyAlreadyVisited('<...>')
532        visited.add(self.as_address())
533
534        pyop_attr_dict = self.get_attr_dict()
535        if pyop_attr_dict:
536            attr_dict = pyop_attr_dict.proxyval(visited)
537        else:
538            attr_dict = {}
539        tp_name = self.safe_tp_name()
540
541        # Class:
542        return InstanceProxy(tp_name, attr_dict, long(self._gdbval))
543
544    def write_repr(self, out, visited):
545        # Guard against infinite loops:
546        if self.as_address() in visited:
547            out.write('<...>')
548            return
549        visited.add(self.as_address())
550
551        pyop_attrdict = self.get_attr_dict()
552        _write_instance_repr(out, visited,
553                             self.safe_tp_name(), pyop_attrdict, self.as_address())
554
555class ProxyException(Exception):
556    def __init__(self, tp_name, args):
557        self.tp_name = tp_name
558        self.args = args
559
560    def __repr__(self):
561        return '%s%r' % (self.tp_name, self.args)
562
563class PyBaseExceptionObjectPtr(PyObjectPtr):
564    """
565    Class wrapping a gdb.Value that's a PyBaseExceptionObject* i.e. an exception
566    within the process being debugged.
567    """
568    _typename = 'PyBaseExceptionObject'
569
570    def proxyval(self, visited):
571        # Guard against infinite loops:
572        if self.as_address() in visited:
573            return ProxyAlreadyVisited('(...)')
574        visited.add(self.as_address())
575        arg_proxy = self.pyop_field('args').proxyval(visited)
576        return ProxyException(self.safe_tp_name(),
577                              arg_proxy)
578
579    def write_repr(self, out, visited):
580        # Guard against infinite loops:
581        if self.as_address() in visited:
582            out.write('(...)')
583            return
584        visited.add(self.as_address())
585
586        out.write(self.safe_tp_name())
587        self.write_field_repr('args', out, visited)
588
589class PyClassObjectPtr(PyObjectPtr):
590    """
591    Class wrapping a gdb.Value that's a PyClassObject* i.e. a <classobj>
592    instance within the process being debugged.
593    """
594    _typename = 'PyClassObject'
595
596
597class BuiltInFunctionProxy(object):
598    def __init__(self, ml_name):
599        self.ml_name = ml_name
600
601    def __repr__(self):
602        return "<built-in function %s>" % self.ml_name
603
604class BuiltInMethodProxy(object):
605    def __init__(self, ml_name, pyop_m_self):
606        self.ml_name = ml_name
607        self.pyop_m_self = pyop_m_self
608
609    def __repr__(self):
610        return ('<built-in method %s of %s object at remote 0x%x>'
611                % (self.ml_name,
612                   self.pyop_m_self.safe_tp_name(),
613                   self.pyop_m_self.as_address())
614                )
615
616class PyCFunctionObjectPtr(PyObjectPtr):
617    """
618    Class wrapping a gdb.Value that's a PyCFunctionObject*
619    (see Include/methodobject.h and Objects/methodobject.c)
620    """
621    _typename = 'PyCFunctionObject'
622
623    def proxyval(self, visited):
624        m_ml = self.field('m_ml') # m_ml is a (PyMethodDef*)
625        try:
626            ml_name = m_ml['ml_name'].string()
627        except UnicodeDecodeError:
628            ml_name = '<ml_name:UnicodeDecodeError>'
629
630        pyop_m_self = self.pyop_field('m_self')
631        if pyop_m_self.is_null():
632            return BuiltInFunctionProxy(ml_name)
633        else:
634            return BuiltInMethodProxy(ml_name, pyop_m_self)
635
636
637class PyCodeObjectPtr(PyObjectPtr):
638    """
639    Class wrapping a gdb.Value that's a PyCodeObject* i.e. a <code> instance
640    within the process being debugged.
641    """
642    _typename = 'PyCodeObject'
643
644    def addr2line(self, addrq):
645        '''
646        Get the line number for a given bytecode offset
647
648        Analogous to PyCode_Addr2Line; translated from pseudocode in
649        Objects/lnotab_notes.txt
650        '''
651        co_lnotab = self.pyop_field('co_lnotab').proxyval(set())
652
653        # Initialize lineno to co_firstlineno as per PyCode_Addr2Line
654        # not 0, as lnotab_notes.txt has it:
655        lineno = int_from_int(self.field('co_firstlineno'))
656
657        addr = 0
658        for addr_incr, line_incr in zip(co_lnotab[::2], co_lnotab[1::2]):
659            addr += ord(addr_incr)
660            if addr > addrq:
661                return lineno
662            lineno += ord(line_incr)
663        return lineno
664
665
666class PyDictObjectPtr(PyObjectPtr):
667    """
668    Class wrapping a gdb.Value that's a PyDictObject* i.e. a dict instance
669    within the process being debugged.
670    """
671    _typename = 'PyDictObject'
672
673    def iteritems(self):
674        '''
675        Yields a sequence of (PyObjectPtr key, PyObjectPtr value) pairs,
676        analogous to dict.iteritems()
677        '''
678        keys = self.field('ma_keys')
679        values = self.field('ma_values')
680        entries, nentries = self._get_entries(keys)
681        for i in safe_range(nentries):
682            ep = entries[i]
683            if long(values):
684                pyop_value = PyObjectPtr.from_pyobject_ptr(values[i])
685            else:
686                pyop_value = PyObjectPtr.from_pyobject_ptr(ep['me_value'])
687            if not pyop_value.is_null():
688                pyop_key = PyObjectPtr.from_pyobject_ptr(ep['me_key'])
689                yield (pyop_key, pyop_value)
690
691    def proxyval(self, visited):
692        # Guard against infinite loops:
693        if self.as_address() in visited:
694            return ProxyAlreadyVisited('{...}')
695        visited.add(self.as_address())
696
697        result = {}
698        for pyop_key, pyop_value in self.iteritems():
699            proxy_key = pyop_key.proxyval(visited)
700            proxy_value = pyop_value.proxyval(visited)
701            result[proxy_key] = proxy_value
702        return result
703
704    def write_repr(self, out, visited):
705        # Guard against infinite loops:
706        if self.as_address() in visited:
707            out.write('{...}')
708            return
709        visited.add(self.as_address())
710
711        out.write('{')
712        first = True
713        for pyop_key, pyop_value in self.iteritems():
714            if not first:
715                out.write(', ')
716            first = False
717            pyop_key.write_repr(out, visited)
718            out.write(': ')
719            pyop_value.write_repr(out, visited)
720        out.write('}')
721
722    def _get_entries(self, keys):
723        dk_nentries = int(keys['dk_nentries'])
724        dk_size = int(keys['dk_size'])
725        try:
726            # <= Python 3.5
727            return keys['dk_entries'], dk_size
728        except RuntimeError:
729            # >= Python 3.6
730            pass
731
732        if dk_size <= 0xFF:
733            offset = dk_size
734        elif dk_size <= 0xFFFF:
735            offset = 2 * dk_size
736        elif dk_size <= 0xFFFFFFFF:
737            offset = 4 * dk_size
738        else:
739            offset = 8 * dk_size
740
741        ent_addr = keys['dk_indices'].address
742        ent_addr = ent_addr.cast(_type_unsigned_char_ptr()) + offset
743        ent_ptr_t = gdb.lookup_type('PyDictKeyEntry').pointer()
744        ent_addr = ent_addr.cast(ent_ptr_t)
745
746        return ent_addr, dk_nentries
747
748
749class PyListObjectPtr(PyObjectPtr):
750    _typename = 'PyListObject'
751
752    def __getitem__(self, i):
753        # Get the gdb.Value for the (PyObject*) with the given index:
754        field_ob_item = self.field('ob_item')
755        return field_ob_item[i]
756
757    def proxyval(self, visited):
758        # Guard against infinite loops:
759        if self.as_address() in visited:
760            return ProxyAlreadyVisited('[...]')
761        visited.add(self.as_address())
762
763        result = [PyObjectPtr.from_pyobject_ptr(self[i]).proxyval(visited)
764                  for i in safe_range(int_from_int(self.field('ob_size')))]
765        return result
766
767    def write_repr(self, out, visited):
768        # Guard against infinite loops:
769        if self.as_address() in visited:
770            out.write('[...]')
771            return
772        visited.add(self.as_address())
773
774        out.write('[')
775        for i in safe_range(int_from_int(self.field('ob_size'))):
776            if i > 0:
777                out.write(', ')
778            element = PyObjectPtr.from_pyobject_ptr(self[i])
779            element.write_repr(out, visited)
780        out.write(']')
781
782class PyLongObjectPtr(PyObjectPtr):
783    _typename = 'PyLongObject'
784
785    def proxyval(self, visited):
786        '''
787        Python's Include/longobjrep.h has this declaration:
788           struct _longobject {
789               PyObject_VAR_HEAD
790               digit ob_digit[1];
791           };
792
793        with this description:
794            The absolute value of a number is equal to
795                 SUM(for i=0 through abs(ob_size)-1) ob_digit[i] * 2**(SHIFT*i)
796            Negative numbers are represented with ob_size < 0;
797            zero is represented by ob_size == 0.
798
799        where SHIFT can be either:
800            #define PyLong_SHIFT        30
801            #define PyLong_SHIFT        15
802        '''
803        ob_size = long(self.field('ob_size'))
804        if ob_size == 0:
805            return 0
806
807        ob_digit = self.field('ob_digit')
808
809        if gdb.lookup_type('digit').sizeof == 2:
810            SHIFT = 15
811        else:
812            SHIFT = 30
813
814        digits = [long(ob_digit[i]) * 2**(SHIFT*i)
815                  for i in safe_range(abs(ob_size))]
816        result = sum(digits)
817        if ob_size < 0:
818            result = -result
819        return result
820
821    def write_repr(self, out, visited):
822        # Write this out as a Python 3 int literal, i.e. without the "L" suffix
823        proxy = self.proxyval(visited)
824        out.write("%s" % proxy)
825
826
827class PyBoolObjectPtr(PyLongObjectPtr):
828    """
829    Class wrapping a gdb.Value that's a PyBoolObject* i.e. one of the two
830    <bool> instances (Py_True/Py_False) within the process being debugged.
831    """
832    def proxyval(self, visited):
833        if PyLongObjectPtr.proxyval(self, visited):
834            return True
835        else:
836            return False
837
838class PyNoneStructPtr(PyObjectPtr):
839    """
840    Class wrapping a gdb.Value that's a PyObject* pointing to the
841    singleton (we hope) _Py_NoneStruct with ob_type PyNone_Type
842    """
843    _typename = 'PyObject'
844
845    def proxyval(self, visited):
846        return None
847
848
849class PyFrameObjectPtr(PyObjectPtr):
850    _typename = 'PyFrameObject'
851
852    def __init__(self, gdbval, cast_to=None):
853        PyObjectPtr.__init__(self, gdbval, cast_to)
854
855        if not self.is_optimized_out():
856            self.co = PyCodeObjectPtr.from_pyobject_ptr(self.field('f_code'))
857            self.co_name = self.co.pyop_field('co_name')
858            self.co_filename = self.co.pyop_field('co_filename')
859
860            self.f_lineno = int_from_int(self.field('f_lineno'))
861            self.f_lasti = int_from_int(self.field('f_lasti'))
862            self.co_nlocals = int_from_int(self.co.field('co_nlocals'))
863            self.co_varnames = PyTupleObjectPtr.from_pyobject_ptr(self.co.field('co_varnames'))
864
865    def iter_locals(self):
866        '''
867        Yield a sequence of (name,value) pairs of PyObjectPtr instances, for
868        the local variables of this frame
869        '''
870        if self.is_optimized_out():
871            return
872
873        f_localsplus = self.field('f_localsplus')
874        for i in safe_range(self.co_nlocals):
875            pyop_value = PyObjectPtr.from_pyobject_ptr(f_localsplus[i])
876            if not pyop_value.is_null():
877                pyop_name = PyObjectPtr.from_pyobject_ptr(self.co_varnames[i])
878                yield (pyop_name, pyop_value)
879
880    def iter_globals(self):
881        '''
882        Yield a sequence of (name,value) pairs of PyObjectPtr instances, for
883        the global variables of this frame
884        '''
885        if self.is_optimized_out():
886            return ()
887
888        pyop_globals = self.pyop_field('f_globals')
889        return pyop_globals.iteritems()
890
891    def iter_builtins(self):
892        '''
893        Yield a sequence of (name,value) pairs of PyObjectPtr instances, for
894        the builtin variables
895        '''
896        if self.is_optimized_out():
897            return ()
898
899        pyop_builtins = self.pyop_field('f_builtins')
900        return pyop_builtins.iteritems()
901
902    def get_var_by_name(self, name):
903        '''
904        Look for the named local variable, returning a (PyObjectPtr, scope) pair
905        where scope is a string 'local', 'global', 'builtin'
906
907        If not found, return (None, None)
908        '''
909        for pyop_name, pyop_value in self.iter_locals():
910            if name == pyop_name.proxyval(set()):
911                return pyop_value, 'local'
912        for pyop_name, pyop_value in self.iter_globals():
913            if name == pyop_name.proxyval(set()):
914                return pyop_value, 'global'
915        for pyop_name, pyop_value in self.iter_builtins():
916            if name == pyop_name.proxyval(set()):
917                return pyop_value, 'builtin'
918        return None, None
919
920    def filename(self):
921        '''Get the path of the current Python source file, as a string'''
922        if self.is_optimized_out():
923            return FRAME_INFO_OPTIMIZED_OUT
924        return self.co_filename.proxyval(set())
925
926    def current_line_num(self):
927        '''Get current line number as an integer (1-based)
928
929        Translated from PyFrame_GetLineNumber and PyCode_Addr2Line
930
931        See Objects/lnotab_notes.txt
932        '''
933        if self.is_optimized_out():
934            return None
935        f_trace = self.field('f_trace')
936        if long(f_trace) != 0:
937            # we have a non-NULL f_trace:
938            return self.f_lineno
939
940        try:
941            return self.co.addr2line(self.f_lasti)
942        except Exception:
943            # bpo-34989: addr2line() is a complex function, it can fail in many
944            # ways. For example, it fails with a TypeError on "FakeRepr" if
945            # gdb fails to load debug symbols. Use a catch-all "except
946            # Exception" to make the whole function safe. The caller has to
947            # handle None anyway for optimized Python.
948            return None
949
950    def current_line(self):
951        '''Get the text of the current source line as a string, with a trailing
952        newline character'''
953        if self.is_optimized_out():
954            return FRAME_INFO_OPTIMIZED_OUT
955
956        lineno = self.current_line_num()
957        if lineno is None:
958            return '(failed to get frame line number)'
959
960        filename = self.filename()
961        try:
962            with open(os_fsencode(filename), 'r') as fp:
963                lines = fp.readlines()
964        except IOError:
965            return None
966
967        try:
968            # Convert from 1-based current_line_num to 0-based list offset
969            return lines[lineno - 1]
970        except IndexError:
971            return None
972
973    def write_repr(self, out, visited):
974        if self.is_optimized_out():
975            out.write(FRAME_INFO_OPTIMIZED_OUT)
976            return
977        lineno = self.current_line_num()
978        lineno = str(lineno) if lineno is not None else "?"
979        out.write('Frame 0x%x, for file %s, line %s, in %s ('
980                  % (self.as_address(),
981                     self.co_filename.proxyval(visited),
982                     lineno,
983                     self.co_name.proxyval(visited)))
984        first = True
985        for pyop_name, pyop_value in self.iter_locals():
986            if not first:
987                out.write(', ')
988            first = False
989
990            out.write(pyop_name.proxyval(visited))
991            out.write('=')
992            pyop_value.write_repr(out, visited)
993
994        out.write(')')
995
996    def print_traceback(self):
997        if self.is_optimized_out():
998            sys.stdout.write('  %s\n' % FRAME_INFO_OPTIMIZED_OUT)
999            return
1000        visited = set()
1001        lineno = self.current_line_num()
1002        lineno = str(lineno) if lineno is not None else "?"
1003        sys.stdout.write('  File "%s", line %s, in %s\n'
1004                  % (self.co_filename.proxyval(visited),
1005                     lineno,
1006                     self.co_name.proxyval(visited)))
1007
1008class PySetObjectPtr(PyObjectPtr):
1009    _typename = 'PySetObject'
1010
1011    @classmethod
1012    def _dummy_key(self):
1013        return gdb.lookup_global_symbol('_PySet_Dummy').value()
1014
1015    def __iter__(self):
1016        dummy_ptr = self._dummy_key()
1017        table = self.field('table')
1018        for i in safe_range(self.field('mask') + 1):
1019            setentry = table[i]
1020            key = setentry['key']
1021            if key != 0 and key != dummy_ptr:
1022                yield PyObjectPtr.from_pyobject_ptr(key)
1023
1024    def proxyval(self, visited):
1025        # Guard against infinite loops:
1026        if self.as_address() in visited:
1027            return ProxyAlreadyVisited('%s(...)' % self.safe_tp_name())
1028        visited.add(self.as_address())
1029
1030        members = (key.proxyval(visited) for key in self)
1031        if self.safe_tp_name() == 'frozenset':
1032            return frozenset(members)
1033        else:
1034            return set(members)
1035
1036    def write_repr(self, out, visited):
1037        # Emulate Python 3's set_repr
1038        tp_name = self.safe_tp_name()
1039
1040        # Guard against infinite loops:
1041        if self.as_address() in visited:
1042            out.write('(...)')
1043            return
1044        visited.add(self.as_address())
1045
1046        # Python 3's set_repr special-cases the empty set:
1047        if not self.field('used'):
1048            out.write(tp_name)
1049            out.write('()')
1050            return
1051
1052        # Python 3 uses {} for set literals:
1053        if tp_name != 'set':
1054            out.write(tp_name)
1055            out.write('(')
1056
1057        out.write('{')
1058        first = True
1059        for key in self:
1060            if not first:
1061                out.write(', ')
1062            first = False
1063            key.write_repr(out, visited)
1064        out.write('}')
1065
1066        if tp_name != 'set':
1067            out.write(')')
1068
1069
1070class PyBytesObjectPtr(PyObjectPtr):
1071    _typename = 'PyBytesObject'
1072
1073    def __str__(self):
1074        field_ob_size = self.field('ob_size')
1075        field_ob_sval = self.field('ob_sval')
1076        char_ptr = field_ob_sval.address.cast(_type_unsigned_char_ptr())
1077        return ''.join([chr(char_ptr[i]) for i in safe_range(field_ob_size)])
1078
1079    def proxyval(self, visited):
1080        return str(self)
1081
1082    def write_repr(self, out, visited):
1083        # Write this out as a Python 3 bytes literal, i.e. with a "b" prefix
1084
1085        # Get a PyStringObject* within the Python 2 gdb process:
1086        proxy = self.proxyval(visited)
1087
1088        # Transliteration of Python 3's Objects/bytesobject.c:PyBytes_Repr
1089        # to Python 2 code:
1090        quote = "'"
1091        if "'" in proxy and not '"' in proxy:
1092            quote = '"'
1093        out.write('b')
1094        out.write(quote)
1095        for byte in proxy:
1096            if byte == quote or byte == '\\':
1097                out.write('\\')
1098                out.write(byte)
1099            elif byte == '\t':
1100                out.write('\\t')
1101            elif byte == '\n':
1102                out.write('\\n')
1103            elif byte == '\r':
1104                out.write('\\r')
1105            elif byte < ' ' or ord(byte) >= 0x7f:
1106                out.write('\\x')
1107                out.write(hexdigits[(ord(byte) & 0xf0) >> 4])
1108                out.write(hexdigits[ord(byte) & 0xf])
1109            else:
1110                out.write(byte)
1111        out.write(quote)
1112
1113class PyTupleObjectPtr(PyObjectPtr):
1114    _typename = 'PyTupleObject'
1115
1116    def __getitem__(self, i):
1117        # Get the gdb.Value for the (PyObject*) with the given index:
1118        field_ob_item = self.field('ob_item')
1119        return field_ob_item[i]
1120
1121    def proxyval(self, visited):
1122        # Guard against infinite loops:
1123        if self.as_address() in visited:
1124            return ProxyAlreadyVisited('(...)')
1125        visited.add(self.as_address())
1126
1127        result = tuple(PyObjectPtr.from_pyobject_ptr(self[i]).proxyval(visited)
1128                       for i in safe_range(int_from_int(self.field('ob_size'))))
1129        return result
1130
1131    def write_repr(self, out, visited):
1132        # Guard against infinite loops:
1133        if self.as_address() in visited:
1134            out.write('(...)')
1135            return
1136        visited.add(self.as_address())
1137
1138        out.write('(')
1139        for i in safe_range(int_from_int(self.field('ob_size'))):
1140            if i > 0:
1141                out.write(', ')
1142            element = PyObjectPtr.from_pyobject_ptr(self[i])
1143            element.write_repr(out, visited)
1144        if self.field('ob_size') == 1:
1145            out.write(',)')
1146        else:
1147            out.write(')')
1148
1149class PyTypeObjectPtr(PyObjectPtr):
1150    _typename = 'PyTypeObject'
1151
1152
1153def _unichr_is_printable(char):
1154    # Logic adapted from Python 3's Tools/unicode/makeunicodedata.py
1155    if char == u" ":
1156        return True
1157    import unicodedata
1158    return unicodedata.category(char) not in ("C", "Z")
1159
1160if sys.maxunicode >= 0x10000:
1161    _unichr = unichr
1162else:
1163    # Needed for proper surrogate support if sizeof(Py_UNICODE) is 2 in gdb
1164    def _unichr(x):
1165        if x < 0x10000:
1166            return unichr(x)
1167        x -= 0x10000
1168        ch1 = 0xD800 | (x >> 10)
1169        ch2 = 0xDC00 | (x & 0x3FF)
1170        return unichr(ch1) + unichr(ch2)
1171
1172
1173class PyUnicodeObjectPtr(PyObjectPtr):
1174    _typename = 'PyUnicodeObject'
1175
1176    def char_width(self):
1177        _type_Py_UNICODE = gdb.lookup_type('Py_UNICODE')
1178        return _type_Py_UNICODE.sizeof
1179
1180    def proxyval(self, visited):
1181        global _is_pep393
1182        if _is_pep393 is None:
1183            fields = gdb.lookup_type('PyUnicodeObject').fields()
1184            _is_pep393 = 'data' in [f.name for f in fields]
1185        if _is_pep393:
1186            # Python 3.3 and newer
1187            may_have_surrogates = False
1188            compact = self.field('_base')
1189            ascii = compact['_base']
1190            state = ascii['state']
1191            is_compact_ascii = (int(state['ascii']) and int(state['compact']))
1192            if not int(state['ready']):
1193                # string is not ready
1194                field_length = long(compact['wstr_length'])
1195                may_have_surrogates = True
1196                field_str = ascii['wstr']
1197            else:
1198                field_length = long(ascii['length'])
1199                if is_compact_ascii:
1200                    field_str = ascii.address + 1
1201                elif int(state['compact']):
1202                    field_str = compact.address + 1
1203                else:
1204                    field_str = self.field('data')['any']
1205                repr_kind = int(state['kind'])
1206                if repr_kind == 1:
1207                    field_str = field_str.cast(_type_unsigned_char_ptr())
1208                elif repr_kind == 2:
1209                    field_str = field_str.cast(_type_unsigned_short_ptr())
1210                elif repr_kind == 4:
1211                    field_str = field_str.cast(_type_unsigned_int_ptr())
1212        else:
1213            # Python 3.2 and earlier
1214            field_length = long(self.field('length'))
1215            field_str = self.field('str')
1216            may_have_surrogates = self.char_width() == 2
1217
1218        # Gather a list of ints from the Py_UNICODE array; these are either
1219        # UCS-1, UCS-2 or UCS-4 code points:
1220        if not may_have_surrogates:
1221            Py_UNICODEs = [int(field_str[i]) for i in safe_range(field_length)]
1222        else:
1223            # A more elaborate routine if sizeof(Py_UNICODE) is 2 in the
1224            # inferior process: we must join surrogate pairs.
1225            Py_UNICODEs = []
1226            i = 0
1227            limit = safety_limit(field_length)
1228            while i < limit:
1229                ucs = int(field_str[i])
1230                i += 1
1231                if ucs < 0xD800 or ucs >= 0xDC00 or i == field_length:
1232                    Py_UNICODEs.append(ucs)
1233                    continue
1234                # This could be a surrogate pair.
1235                ucs2 = int(field_str[i])
1236                if ucs2 < 0xDC00 or ucs2 > 0xDFFF:
1237                    continue
1238                code = (ucs & 0x03FF) << 10
1239                code |= ucs2 & 0x03FF
1240                code += 0x00010000
1241                Py_UNICODEs.append(code)
1242                i += 1
1243
1244        # Convert the int code points to unicode characters, and generate a
1245        # local unicode instance.
1246        # This splits surrogate pairs if sizeof(Py_UNICODE) is 2 here (in gdb).
1247        result = u''.join([
1248            (_unichr(ucs) if ucs <= 0x10ffff else '\ufffd')
1249            for ucs in Py_UNICODEs])
1250        return result
1251
1252    def write_repr(self, out, visited):
1253        # Write this out as a Python 3 str literal, i.e. without a "u" prefix
1254
1255        # Get a PyUnicodeObject* within the Python 2 gdb process:
1256        proxy = self.proxyval(visited)
1257
1258        # Transliteration of Python 3's Object/unicodeobject.c:unicode_repr
1259        # to Python 2:
1260        if "'" in proxy and '"' not in proxy:
1261            quote = '"'
1262        else:
1263            quote = "'"
1264        out.write(quote)
1265
1266        i = 0
1267        while i < len(proxy):
1268            ch = proxy[i]
1269            i += 1
1270
1271            # Escape quotes and backslashes
1272            if ch == quote or ch == '\\':
1273                out.write('\\')
1274                out.write(ch)
1275
1276            #  Map special whitespace to '\t', \n', '\r'
1277            elif ch == '\t':
1278                out.write('\\t')
1279            elif ch == '\n':
1280                out.write('\\n')
1281            elif ch == '\r':
1282                out.write('\\r')
1283
1284            # Map non-printable US ASCII to '\xhh' */
1285            elif ch < ' ' or ch == 0x7F:
1286                out.write('\\x')
1287                out.write(hexdigits[(ord(ch) >> 4) & 0x000F])
1288                out.write(hexdigits[ord(ch) & 0x000F])
1289
1290            # Copy ASCII characters as-is
1291            elif ord(ch) < 0x7F:
1292                out.write(ch)
1293
1294            # Non-ASCII characters
1295            else:
1296                ucs = ch
1297                ch2 = None
1298                if sys.maxunicode < 0x10000:
1299                    # If sizeof(Py_UNICODE) is 2 here (in gdb), join
1300                    # surrogate pairs before calling _unichr_is_printable.
1301                    if (i < len(proxy)
1302                    and 0xD800 <= ord(ch) < 0xDC00 \
1303                    and 0xDC00 <= ord(proxy[i]) <= 0xDFFF):
1304                        ch2 = proxy[i]
1305                        ucs = ch + ch2
1306                        i += 1
1307
1308                # Unfortuately, Python 2's unicode type doesn't seem
1309                # to expose the "isprintable" method
1310                printable = _unichr_is_printable(ucs)
1311                if printable:
1312                    try:
1313                        ucs.encode(ENCODING)
1314                    except UnicodeEncodeError:
1315                        printable = False
1316
1317                # Map Unicode whitespace and control characters
1318                # (categories Z* and C* except ASCII space)
1319                if not printable:
1320                    if ch2 is not None:
1321                        # Match Python 3's representation of non-printable
1322                        # wide characters.
1323                        code = (ord(ch) & 0x03FF) << 10
1324                        code |= ord(ch2) & 0x03FF
1325                        code += 0x00010000
1326                    else:
1327                        code = ord(ucs)
1328
1329                    # Map 8-bit characters to '\\xhh'
1330                    if code <= 0xff:
1331                        out.write('\\x')
1332                        out.write(hexdigits[(code >> 4) & 0x000F])
1333                        out.write(hexdigits[code & 0x000F])
1334                    # Map 21-bit characters to '\U00xxxxxx'
1335                    elif code >= 0x10000:
1336                        out.write('\\U')
1337                        out.write(hexdigits[(code >> 28) & 0x0000000F])
1338                        out.write(hexdigits[(code >> 24) & 0x0000000F])
1339                        out.write(hexdigits[(code >> 20) & 0x0000000F])
1340                        out.write(hexdigits[(code >> 16) & 0x0000000F])
1341                        out.write(hexdigits[(code >> 12) & 0x0000000F])
1342                        out.write(hexdigits[(code >> 8) & 0x0000000F])
1343                        out.write(hexdigits[(code >> 4) & 0x0000000F])
1344                        out.write(hexdigits[code & 0x0000000F])
1345                    # Map 16-bit characters to '\uxxxx'
1346                    else:
1347                        out.write('\\u')
1348                        out.write(hexdigits[(code >> 12) & 0x000F])
1349                        out.write(hexdigits[(code >> 8) & 0x000F])
1350                        out.write(hexdigits[(code >> 4) & 0x000F])
1351                        out.write(hexdigits[code & 0x000F])
1352                else:
1353                    # Copy characters as-is
1354                    out.write(ch)
1355                    if ch2 is not None:
1356                        out.write(ch2)
1357
1358        out.write(quote)
1359
1360
1361class wrapperobject(PyObjectPtr):
1362    _typename = 'wrapperobject'
1363
1364    def safe_name(self):
1365        try:
1366            name = self.field('descr')['d_base']['name'].string()
1367            return repr(name)
1368        except (NullPyObjectPtr, RuntimeError, UnicodeDecodeError):
1369            return '<unknown name>'
1370
1371    def safe_tp_name(self):
1372        try:
1373            return self.field('self')['ob_type']['tp_name'].string()
1374        except (NullPyObjectPtr, RuntimeError, UnicodeDecodeError):
1375            return '<unknown tp_name>'
1376
1377    def safe_self_addresss(self):
1378        try:
1379            address = long(self.field('self'))
1380            return '%#x' % address
1381        except (NullPyObjectPtr, RuntimeError):
1382            return '<failed to get self address>'
1383
1384    def proxyval(self, visited):
1385        name = self.safe_name()
1386        tp_name = self.safe_tp_name()
1387        self_address = self.safe_self_addresss()
1388        return ("<method-wrapper %s of %s object at %s>"
1389                % (name, tp_name, self_address))
1390
1391    def write_repr(self, out, visited):
1392        proxy = self.proxyval(visited)
1393        out.write(proxy)
1394
1395
1396def int_from_int(gdbval):
1397    return int(gdbval)
1398
1399
1400def stringify(val):
1401    # TODO: repr() puts everything on one line; pformat can be nicer, but
1402    # can lead to v.long results; this function isolates the choice
1403    if True:
1404        return repr(val)
1405    else:
1406        from pprint import pformat
1407        return pformat(val)
1408
1409
1410class PyObjectPtrPrinter:
1411    "Prints a (PyObject*)"
1412
1413    def __init__ (self, gdbval):
1414        self.gdbval = gdbval
1415
1416    def to_string (self):
1417        pyop = PyObjectPtr.from_pyobject_ptr(self.gdbval)
1418        if True:
1419            return pyop.get_truncated_repr(MAX_OUTPUT_LEN)
1420        else:
1421            # Generate full proxy value then stringify it.
1422            # Doing so could be expensive
1423            proxyval = pyop.proxyval(set())
1424            return stringify(proxyval)
1425
1426def pretty_printer_lookup(gdbval):
1427    type = gdbval.type.unqualified()
1428    if type.code != gdb.TYPE_CODE_PTR:
1429        return None
1430
1431    type = type.target().unqualified()
1432    t = str(type)
1433    if t in ("PyObject", "PyFrameObject", "PyUnicodeObject", "wrapperobject"):
1434        return PyObjectPtrPrinter(gdbval)
1435
1436"""
1437During development, I've been manually invoking the code in this way:
1438(gdb) python
1439
1440import sys
1441sys.path.append('/home/david/coding/python-gdb')
1442import libpython
1443end
1444
1445then reloading it after each edit like this:
1446(gdb) python reload(libpython)
1447
1448The following code should ensure that the prettyprinter is registered
1449if the code is autoloaded by gdb when visiting libpython.so, provided
1450that this python file is installed to the same path as the library (or its
1451.debug file) plus a "-gdb.py" suffix, e.g:
1452  /usr/lib/libpython2.6.so.1.0-gdb.py
1453  /usr/lib/debug/usr/lib/libpython2.6.so.1.0.debug-gdb.py
1454"""
1455def register (obj):
1456    if obj is None:
1457        obj = gdb
1458
1459    # Wire up the pretty-printer
1460    obj.pretty_printers.append(pretty_printer_lookup)
1461
1462register (gdb.current_objfile ())
1463
1464
1465
1466# Unfortunately, the exact API exposed by the gdb module varies somewhat
1467# from build to build
1468# See http://bugs.python.org/issue8279?#msg102276
1469
1470class Frame(object):
1471    '''
1472    Wrapper for gdb.Frame, adding various methods
1473    '''
1474    def __init__(self, gdbframe):
1475        self._gdbframe = gdbframe
1476
1477    def older(self):
1478        older = self._gdbframe.older()
1479        if older:
1480            return Frame(older)
1481        else:
1482            return None
1483
1484    def newer(self):
1485        newer = self._gdbframe.newer()
1486        if newer:
1487            return Frame(newer)
1488        else:
1489            return None
1490
1491    def select(self):
1492        '''If supported, select this frame and return True; return False if unsupported
1493
1494        Not all builds have a gdb.Frame.select method; seems to be present on Fedora 12
1495        onwards, but absent on Ubuntu buildbot'''
1496        if not hasattr(self._gdbframe, 'select'):
1497            print ('Unable to select frame: '
1498                   'this build of gdb does not expose a gdb.Frame.select method')
1499            return False
1500        self._gdbframe.select()
1501        return True
1502
1503    def get_index(self):
1504        '''Calculate index of frame, starting at 0 for the newest frame within
1505        this thread'''
1506        index = 0
1507        # Go down until you reach the newest frame:
1508        iter_frame = self
1509        while iter_frame.newer():
1510            index += 1
1511            iter_frame = iter_frame.newer()
1512        return index
1513
1514    # We divide frames into:
1515    #   - "python frames":
1516    #       - "bytecode frames" i.e. PyEval_EvalFrameEx
1517    #       - "other python frames": things that are of interest from a python
1518    #         POV, but aren't bytecode (e.g. GC, GIL)
1519    #   - everything else
1520
1521    def is_python_frame(self):
1522        '''Is this a _PyEval_EvalFrameDefault frame, or some other important
1523        frame? (see is_other_python_frame for what "important" means in this
1524        context)'''
1525        if self.is_evalframe():
1526            return True
1527        if self.is_other_python_frame():
1528            return True
1529        return False
1530
1531    def is_evalframe(self):
1532        '''Is this a _PyEval_EvalFrameDefault frame?'''
1533        if self._gdbframe.name() == EVALFRAME:
1534            '''
1535            I believe we also need to filter on the inline
1536            struct frame_id.inline_depth, only regarding frames with
1537            an inline depth of 0 as actually being this function
1538
1539            So we reject those with type gdb.INLINE_FRAME
1540            '''
1541            if self._gdbframe.type() == gdb.NORMAL_FRAME:
1542                # We have a _PyEval_EvalFrameDefault frame:
1543                return True
1544
1545        return False
1546
1547    def is_other_python_frame(self):
1548        '''Is this frame worth displaying in python backtraces?
1549        Examples:
1550          - waiting on the GIL
1551          - garbage-collecting
1552          - within a CFunction
1553         If it is, return a descriptive string
1554         For other frames, return False
1555         '''
1556        if self.is_waiting_for_gil():
1557            return 'Waiting for the GIL'
1558
1559        if self.is_gc_collect():
1560            return 'Garbage-collecting'
1561
1562        # Detect invocations of PyCFunction instances:
1563        frame = self._gdbframe
1564        caller = frame.name()
1565        if not caller:
1566            return False
1567
1568        if (caller.startswith('cfunction_vectorcall_') or
1569            caller == 'cfunction_call'):
1570            arg_name = 'func'
1571            # Within that frame:
1572            #   "func" is the local containing the PyObject* of the
1573            # PyCFunctionObject instance
1574            #   "f" is the same value, but cast to (PyCFunctionObject*)
1575            #   "self" is the (PyObject*) of the 'self'
1576            try:
1577                # Use the prettyprinter for the func:
1578                func = frame.read_var(arg_name)
1579                return str(func)
1580            except ValueError:
1581                return ('PyCFunction invocation (unable to read %s: '
1582                        'missing debuginfos?)' % arg_name)
1583            except RuntimeError:
1584                return 'PyCFunction invocation (unable to read %s)' % arg_name
1585
1586        if caller == 'wrapper_call':
1587            arg_name = 'wp'
1588            try:
1589                func = frame.read_var(arg_name)
1590                return str(func)
1591            except ValueError:
1592                return ('<wrapper_call invocation (unable to read %s: '
1593                        'missing debuginfos?)>' % arg_name)
1594            except RuntimeError:
1595                return '<wrapper_call invocation (unable to read %s)>' % arg_name
1596
1597        # This frame isn't worth reporting:
1598        return False
1599
1600    def is_waiting_for_gil(self):
1601        '''Is this frame waiting on the GIL?'''
1602        # This assumes the _POSIX_THREADS version of Python/ceval_gil.h:
1603        name = self._gdbframe.name()
1604        if name:
1605            return (name == 'take_gil')
1606
1607    def is_gc_collect(self):
1608        '''Is this frame "collect" within the garbage-collector?'''
1609        return self._gdbframe.name() == 'collect'
1610
1611    def get_pyop(self):
1612        try:
1613            f = self._gdbframe.read_var('f')
1614            frame = PyFrameObjectPtr.from_pyobject_ptr(f)
1615            if not frame.is_optimized_out():
1616                return frame
1617            # gdb is unable to get the "f" argument of PyEval_EvalFrameEx()
1618            # because it was "optimized out". Try to get "f" from the frame
1619            # of the caller, PyEval_EvalCodeEx().
1620            orig_frame = frame
1621            caller = self._gdbframe.older()
1622            if caller:
1623                f = caller.read_var('f')
1624                frame = PyFrameObjectPtr.from_pyobject_ptr(f)
1625                if not frame.is_optimized_out():
1626                    return frame
1627            return orig_frame
1628        except ValueError:
1629            return None
1630
1631    @classmethod
1632    def get_selected_frame(cls):
1633        _gdbframe = gdb.selected_frame()
1634        if _gdbframe:
1635            return Frame(_gdbframe)
1636        return None
1637
1638    @classmethod
1639    def get_selected_python_frame(cls):
1640        '''Try to obtain the Frame for the python-related code in the selected
1641        frame, or None'''
1642        try:
1643            frame = cls.get_selected_frame()
1644        except gdb.error:
1645            # No frame: Python didn't start yet
1646            return None
1647
1648        while frame:
1649            if frame.is_python_frame():
1650                return frame
1651            frame = frame.older()
1652
1653        # Not found:
1654        return None
1655
1656    @classmethod
1657    def get_selected_bytecode_frame(cls):
1658        '''Try to obtain the Frame for the python bytecode interpreter in the
1659        selected GDB frame, or None'''
1660        frame = cls.get_selected_frame()
1661
1662        while frame:
1663            if frame.is_evalframe():
1664                return frame
1665            frame = frame.older()
1666
1667        # Not found:
1668        return None
1669
1670    def print_summary(self):
1671        if self.is_evalframe():
1672            pyop = self.get_pyop()
1673            if pyop:
1674                line = pyop.get_truncated_repr(MAX_OUTPUT_LEN)
1675                write_unicode(sys.stdout, '#%i %s\n' % (self.get_index(), line))
1676                if not pyop.is_optimized_out():
1677                    line = pyop.current_line()
1678                    if line is not None:
1679                        sys.stdout.write('    %s\n' % line.strip())
1680            else:
1681                sys.stdout.write('#%i (unable to read python frame information)\n' % self.get_index())
1682        else:
1683            info = self.is_other_python_frame()
1684            if info:
1685                sys.stdout.write('#%i %s\n' % (self.get_index(), info))
1686            else:
1687                sys.stdout.write('#%i\n' % self.get_index())
1688
1689    def print_traceback(self):
1690        if self.is_evalframe():
1691            pyop = self.get_pyop()
1692            if pyop:
1693                pyop.print_traceback()
1694                if not pyop.is_optimized_out():
1695                    line = pyop.current_line()
1696                    if line is not None:
1697                        sys.stdout.write('    %s\n' % line.strip())
1698            else:
1699                sys.stdout.write('  (unable to read python frame information)\n')
1700        else:
1701            info = self.is_other_python_frame()
1702            if info:
1703                sys.stdout.write('  %s\n' % info)
1704            else:
1705                sys.stdout.write('  (not a python frame)\n')
1706
1707class PyList(gdb.Command):
1708    '''List the current Python source code, if any
1709
1710    Use
1711       py-list START
1712    to list at a different line number within the python source.
1713
1714    Use
1715       py-list START, END
1716    to list a specific range of lines within the python source.
1717    '''
1718
1719    def __init__(self):
1720        gdb.Command.__init__ (self,
1721                              "py-list",
1722                              gdb.COMMAND_FILES,
1723                              gdb.COMPLETE_NONE)
1724
1725
1726    def invoke(self, args, from_tty):
1727        import re
1728
1729        start = None
1730        end = None
1731
1732        m = re.match(r'\s*(\d+)\s*', args)
1733        if m:
1734            start = int(m.group(0))
1735            end = start + 10
1736
1737        m = re.match(r'\s*(\d+)\s*,\s*(\d+)\s*', args)
1738        if m:
1739            start, end = map(int, m.groups())
1740
1741        # py-list requires an actual PyEval_EvalFrameEx frame:
1742        frame = Frame.get_selected_bytecode_frame()
1743        if not frame:
1744            print('Unable to locate gdb frame for python bytecode interpreter')
1745            return
1746
1747        pyop = frame.get_pyop()
1748        if not pyop or pyop.is_optimized_out():
1749            print(UNABLE_READ_INFO_PYTHON_FRAME)
1750            return
1751
1752        filename = pyop.filename()
1753        lineno = pyop.current_line_num()
1754        if lineno is None:
1755            print('Unable to read python frame line number')
1756            return
1757
1758        if start is None:
1759            start = lineno - 5
1760            end = lineno + 5
1761
1762        if start<1:
1763            start = 1
1764
1765        try:
1766            f = open(os_fsencode(filename), 'r')
1767        except IOError as err:
1768            sys.stdout.write('Unable to open %s: %s\n'
1769                             % (filename, err))
1770            return
1771        with f:
1772            all_lines = f.readlines()
1773            # start and end are 1-based, all_lines is 0-based;
1774            # so [start-1:end] as a python slice gives us [start, end] as a
1775            # closed interval
1776            for i, line in enumerate(all_lines[start-1:end]):
1777                linestr = str(i+start)
1778                # Highlight current line:
1779                if i + start == lineno:
1780                    linestr = '>' + linestr
1781                sys.stdout.write('%4s    %s' % (linestr, line))
1782
1783
1784# ...and register the command:
1785PyList()
1786
1787def move_in_stack(move_up):
1788    '''Move up or down the stack (for the py-up/py-down command)'''
1789    frame = Frame.get_selected_python_frame()
1790    if not frame:
1791        print('Unable to locate python frame')
1792        return
1793
1794    while frame:
1795        if move_up:
1796            iter_frame = frame.older()
1797        else:
1798            iter_frame = frame.newer()
1799
1800        if not iter_frame:
1801            break
1802
1803        if iter_frame.is_python_frame():
1804            # Result:
1805            if iter_frame.select():
1806                iter_frame.print_summary()
1807            return
1808
1809        frame = iter_frame
1810
1811    if move_up:
1812        print('Unable to find an older python frame')
1813    else:
1814        print('Unable to find a newer python frame')
1815
1816class PyUp(gdb.Command):
1817    'Select and print the python stack frame that called this one (if any)'
1818    def __init__(self):
1819        gdb.Command.__init__ (self,
1820                              "py-up",
1821                              gdb.COMMAND_STACK,
1822                              gdb.COMPLETE_NONE)
1823
1824
1825    def invoke(self, args, from_tty):
1826        move_in_stack(move_up=True)
1827
1828class PyDown(gdb.Command):
1829    'Select and print the python stack frame called by this one (if any)'
1830    def __init__(self):
1831        gdb.Command.__init__ (self,
1832                              "py-down",
1833                              gdb.COMMAND_STACK,
1834                              gdb.COMPLETE_NONE)
1835
1836
1837    def invoke(self, args, from_tty):
1838        move_in_stack(move_up=False)
1839
1840# Not all builds of gdb have gdb.Frame.select
1841if hasattr(gdb.Frame, 'select'):
1842    PyUp()
1843    PyDown()
1844
1845class PyBacktraceFull(gdb.Command):
1846    'Display the current python frame and all the frames within its call stack (if any)'
1847    def __init__(self):
1848        gdb.Command.__init__ (self,
1849                              "py-bt-full",
1850                              gdb.COMMAND_STACK,
1851                              gdb.COMPLETE_NONE)
1852
1853
1854    def invoke(self, args, from_tty):
1855        frame = Frame.get_selected_python_frame()
1856        if not frame:
1857            print('Unable to locate python frame')
1858            return
1859
1860        while frame:
1861            if frame.is_python_frame():
1862                frame.print_summary()
1863            frame = frame.older()
1864
1865PyBacktraceFull()
1866
1867class PyBacktrace(gdb.Command):
1868    'Display the current python frame and all the frames within its call stack (if any)'
1869    def __init__(self):
1870        gdb.Command.__init__ (self,
1871                              "py-bt",
1872                              gdb.COMMAND_STACK,
1873                              gdb.COMPLETE_NONE)
1874
1875
1876    def invoke(self, args, from_tty):
1877        frame = Frame.get_selected_python_frame()
1878        if not frame:
1879            print('Unable to locate python frame')
1880            return
1881
1882        sys.stdout.write('Traceback (most recent call first):\n')
1883        while frame:
1884            if frame.is_python_frame():
1885                frame.print_traceback()
1886            frame = frame.older()
1887
1888PyBacktrace()
1889
1890class PyPrint(gdb.Command):
1891    'Look up the given python variable name, and print it'
1892    def __init__(self):
1893        gdb.Command.__init__ (self,
1894                              "py-print",
1895                              gdb.COMMAND_DATA,
1896                              gdb.COMPLETE_NONE)
1897
1898
1899    def invoke(self, args, from_tty):
1900        name = str(args)
1901
1902        frame = Frame.get_selected_python_frame()
1903        if not frame:
1904            print('Unable to locate python frame')
1905            return
1906
1907        pyop_frame = frame.get_pyop()
1908        if not pyop_frame:
1909            print(UNABLE_READ_INFO_PYTHON_FRAME)
1910            return
1911
1912        pyop_var, scope = pyop_frame.get_var_by_name(name)
1913
1914        if pyop_var:
1915            print('%s %r = %s'
1916                   % (scope,
1917                      name,
1918                      pyop_var.get_truncated_repr(MAX_OUTPUT_LEN)))
1919        else:
1920            print('%r not found' % name)
1921
1922PyPrint()
1923
1924class PyLocals(gdb.Command):
1925    'Look up the given python variable name, and print it'
1926    def __init__(self):
1927        gdb.Command.__init__ (self,
1928                              "py-locals",
1929                              gdb.COMMAND_DATA,
1930                              gdb.COMPLETE_NONE)
1931
1932
1933    def invoke(self, args, from_tty):
1934        name = str(args)
1935
1936        frame = Frame.get_selected_python_frame()
1937        if not frame:
1938            print('Unable to locate python frame')
1939            return
1940
1941        pyop_frame = frame.get_pyop()
1942        if not pyop_frame:
1943            print(UNABLE_READ_INFO_PYTHON_FRAME)
1944            return
1945
1946        for pyop_name, pyop_value in pyop_frame.iter_locals():
1947            print('%s = %s'
1948                   % (pyop_name.proxyval(set()),
1949                      pyop_value.get_truncated_repr(MAX_OUTPUT_LEN)))
1950
1951PyLocals()
1952