• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1"""
2LLDB AppKit formatters
3
4Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
5See https://llvm.org/LICENSE.txt for license information.
6SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7"""
8# example synthetic children and summary provider for CFString (and related NSString class)
9# the real code is part of the LLDB core
10import lldb
11import lldb.runtime.objc.objc_runtime
12import lldb.formatters.Logger
13
14try:
15    unichr
16except NameError:
17    unichr = chr
18
19def CFString_SummaryProvider(valobj, dict):
20    logger = lldb.formatters.Logger.Logger()
21    provider = CFStringSynthProvider(valobj, dict)
22    if not provider.invalid:
23        try:
24            summary = provider.get_child_at_index(
25                provider.get_child_index("content"))
26            if isinstance(summary, lldb.SBValue):
27                summary = summary.GetSummary()
28            else:
29                summary = '"' + summary + '"'
30        except:
31            summary = None
32        if summary is None:
33            summary = '<variable is not NSString>'
34        return '@' + summary
35    return ''
36
37
38def CFAttributedString_SummaryProvider(valobj, dict):
39    logger = lldb.formatters.Logger.Logger()
40    offset = valobj.GetTarget().GetProcess().GetAddressByteSize()
41    pointee = valobj.GetValueAsUnsigned(0)
42    summary = '<variable is not NSAttributedString>'
43    if pointee is not None and pointee != 0:
44        pointee = pointee + offset
45        child_ptr = valobj.CreateValueFromAddress(
46            "string_ptr", pointee, valobj.GetType())
47        child = child_ptr.CreateValueFromAddress(
48            "string_data",
49            child_ptr.GetValueAsUnsigned(),
50            valobj.GetType()).AddressOf()
51        provider = CFStringSynthProvider(child, dict)
52        if not provider.invalid:
53            try:
54                summary = provider.get_child_at_index(
55                    provider.get_child_index("content")).GetSummary()
56            except:
57                summary = '<variable is not NSAttributedString>'
58    if summary is None:
59        summary = '<variable is not NSAttributedString>'
60    return '@' + summary
61
62
63def __lldb_init_module(debugger, dict):
64    debugger.HandleCommand(
65        "type summary add -F CFString.CFString_SummaryProvider NSString CFStringRef CFMutableStringRef")
66    debugger.HandleCommand(
67        "type summary add -F CFString.CFAttributedString_SummaryProvider NSAttributedString")
68
69
70class CFStringSynthProvider:
71
72    def __init__(self, valobj, dict):
73        logger = lldb.formatters.Logger.Logger()
74        self.valobj = valobj
75        self.update()
76
77    # children other than "content" are for debugging only and must not be
78    # used in production code
79    def num_children(self):
80        logger = lldb.formatters.Logger.Logger()
81        if self.invalid:
82            return 0
83        return 6
84
85    def read_unicode(self, pointer, max_len=2048):
86        logger = lldb.formatters.Logger.Logger()
87        process = self.valobj.GetTarget().GetProcess()
88        error = lldb.SBError()
89        pystr = u''
90        # cannot do the read at once because the length value has
91        # a weird encoding. better play it safe here
92        while max_len > 0:
93            content = process.ReadMemory(pointer, 2, error)
94            new_bytes = bytearray(content)
95            b0 = new_bytes[0]
96            b1 = new_bytes[1]
97            pointer = pointer + 2
98            if b0 == 0 and b1 == 0:
99                break
100            # rearrange bytes depending on endianness
101            # (do we really need this or is Cocoa going to
102            #  use Windows-compatible little-endian even
103            #  if the target is big endian?)
104            if self.is_little:
105                value = b1 * 256 + b0
106            else:
107                value = b0 * 256 + b1
108            pystr = pystr + unichr(value)
109            # read max_len unicode values, not max_len bytes
110            max_len = max_len - 1
111        return pystr
112
113    # handle the special case strings
114    # only use the custom code for the tested LP64 case
115    def handle_special(self):
116        logger = lldb.formatters.Logger.Logger()
117        if not self.is_64_bit:
118            # for 32bit targets, use safe ObjC code
119            return self.handle_unicode_string_safe()
120        offset = 12
121        pointer = self.valobj.GetValueAsUnsigned(0) + offset
122        pystr = self.read_unicode(pointer)
123        return self.valobj.CreateValueFromExpression(
124            "content", "(char*)\"" + pystr.encode('utf-8') + "\"")
125
126    # last resort call, use ObjC code to read; the final aim is to
127    # be able to strip this call away entirely and only do the read
128    # ourselves
129    def handle_unicode_string_safe(self):
130        return self.valobj.CreateValueFromExpression(
131            "content", "(char*)\"" + self.valobj.GetObjectDescription() + "\"")
132
133    def handle_unicode_string(self):
134        logger = lldb.formatters.Logger.Logger()
135        # step 1: find offset
136        if self.inline:
137            pointer = self.valobj.GetValueAsUnsigned(
138                0) + self.size_of_cfruntime_base()
139            if not self.explicit:
140                # untested, use the safe code path
141                return self.handle_unicode_string_safe()
142            else:
143                # a full pointer is skipped here before getting to the live
144                # data
145                pointer = pointer + self.pointer_size
146        else:
147            pointer = self.valobj.GetValueAsUnsigned(
148                0) + self.size_of_cfruntime_base()
149            # read 8 bytes here and make an address out of them
150            try:
151                char_type = self.valobj.GetType().GetBasicType(
152                    lldb.eBasicTypeChar).GetPointerType()
153                vopointer = self.valobj.CreateValueFromAddress(
154                    "dummy", pointer, char_type)
155                pointer = vopointer.GetValueAsUnsigned(0)
156            except:
157                return self.valobj.CreateValueFromExpression(
158                    "content", '(char*)"@\"invalid NSString\""')
159        # step 2: read Unicode data at pointer
160        pystr = self.read_unicode(pointer)
161        # step 3: return it
162        return pystr.encode('utf-8')
163
164    def handle_inline_explicit(self):
165        logger = lldb.formatters.Logger.Logger()
166        offset = 3 * self.pointer_size
167        offset = offset + self.valobj.GetValueAsUnsigned(0)
168        return self.valobj.CreateValueFromExpression(
169            "content", "(char*)(" + str(offset) + ")")
170
171    def handle_mutable_string(self):
172        logger = lldb.formatters.Logger.Logger()
173        offset = 2 * self.pointer_size
174        data = self.valobj.CreateChildAtOffset(
175            "content", offset, self.valobj.GetType().GetBasicType(
176                lldb.eBasicTypeChar).GetPointerType())
177        data_value = data.GetValueAsUnsigned(0)
178        if self.explicit and self.unicode:
179            return self.read_unicode(data_value).encode('utf-8')
180        else:
181            data_value = data_value + 1
182            return self.valobj.CreateValueFromExpression(
183                "content", "(char*)(" + str(data_value) + ")")
184
185    def handle_UTF8_inline(self):
186        logger = lldb.formatters.Logger.Logger()
187        offset = self.valobj.GetValueAsUnsigned(
188            0) + self.size_of_cfruntime_base()
189        if not self.explicit:
190            offset = offset + 1
191        return self.valobj.CreateValueFromAddress(
192            "content", offset, self.valobj.GetType().GetBasicType(
193                lldb.eBasicTypeChar)).AddressOf()
194
195    def handle_UTF8_not_inline(self):
196        logger = lldb.formatters.Logger.Logger()
197        offset = self.size_of_cfruntime_base()
198        return self.valobj.CreateChildAtOffset(
199            "content", offset, self.valobj.GetType().GetBasicType(
200                lldb.eBasicTypeChar).GetPointerType())
201
202    def get_child_at_index(self, index):
203        logger = lldb.formatters.Logger.Logger()
204        logger >> "Querying for child [" + str(index) + "]"
205        if index == 0:
206            return self.valobj.CreateValueFromExpression(
207                "mutable", str(int(self.mutable)))
208        if index == 1:
209            return self.valobj.CreateValueFromExpression("inline",
210                                                         str(int(self.inline)))
211        if index == 2:
212            return self.valobj.CreateValueFromExpression(
213                "explicit", str(int(self.explicit)))
214        if index == 3:
215            return self.valobj.CreateValueFromExpression(
216                "unicode", str(int(self.unicode)))
217        if index == 4:
218            return self.valobj.CreateValueFromExpression(
219                "special", str(int(self.special)))
220        if index == 5:
221            # we are handling the several possible combinations of flags.
222            # for each known combination we have a function that knows how to
223            # go fetch the data from memory instead of running code. if a string is not
224            # correctly displayed, one should start by finding a combination of flags that
225            # makes it different from these known cases, and provide a new reader function
226            # if this is not possible, a new flag might have to be made up (like the "special" flag
227            # below, which is not a real flag in CFString), or alternatively one might need to use
228            # the ObjC runtime helper to detect the new class and deal with it accordingly
229            # print 'mutable = ' + str(self.mutable)
230            # print 'inline = ' + str(self.inline)
231            # print 'explicit = ' + str(self.explicit)
232            # print 'unicode = ' + str(self.unicode)
233            # print 'special = ' + str(self.special)
234            if self.mutable:
235                return self.handle_mutable_string()
236            elif self.inline and self.explicit and \
237                    self.unicode == False and self.special == False and \
238                    self.mutable == False:
239                return self.handle_inline_explicit()
240            elif self.unicode:
241                return self.handle_unicode_string()
242            elif self.special:
243                return self.handle_special()
244            elif self.inline:
245                return self.handle_UTF8_inline()
246            else:
247                return self.handle_UTF8_not_inline()
248
249    def get_child_index(self, name):
250        logger = lldb.formatters.Logger.Logger()
251        logger >> "Querying for child ['" + str(name) + "']"
252        if name == "content":
253            return self.num_children() - 1
254        if name == "mutable":
255            return 0
256        if name == "inline":
257            return 1
258        if name == "explicit":
259            return 2
260        if name == "unicode":
261            return 3
262        if name == "special":
263            return 4
264
265    # CFRuntimeBase is defined as having an additional
266    # 4 bytes (padding?) on LP64 architectures
267    # to get its size we add up sizeof(pointer)+4
268    # and then add 4 more bytes if we are on a 64bit system
269    def size_of_cfruntime_base(self):
270        logger = lldb.formatters.Logger.Logger()
271        return self.pointer_size + 4 + (4 if self.is_64_bit else 0)
272
273    # the info bits are part of the CFRuntimeBase structure
274    # to get at them we have to skip a uintptr_t and then get
275    # at the least-significant byte of a 4 byte array. If we are
276    # on big-endian this means going to byte 3, if we are on
277    # little endian (OSX & iOS), this means reading byte 0
278    def offset_of_info_bits(self):
279        logger = lldb.formatters.Logger.Logger()
280        offset = self.pointer_size
281        if not self.is_little:
282            offset = offset + 3
283        return offset
284
285    def read_info_bits(self):
286        logger = lldb.formatters.Logger.Logger()
287        cfinfo = self.valobj.CreateChildAtOffset(
288            "cfinfo",
289            self.offset_of_info_bits(),
290            self.valobj.GetType().GetBasicType(
291                lldb.eBasicTypeChar))
292        cfinfo.SetFormat(11)
293        info = cfinfo.GetValue()
294        if info is not None:
295            self.invalid = False
296            return int(info, 0)
297        else:
298            self.invalid = True
299            return None
300
301    # calculating internal flag bits of the CFString object
302    # this stuff is defined and discussed in CFString.c
303    def is_mutable(self):
304        logger = lldb.formatters.Logger.Logger()
305        return (self.info_bits & 1) == 1
306
307    def is_inline(self):
308        logger = lldb.formatters.Logger.Logger()
309        return (self.info_bits & 0x60) == 0
310
311    # this flag's name is ambiguous, it turns out
312    # we must skip a length byte to get at the data
313    # when this flag is False
314    def has_explicit_length(self):
315        logger = lldb.formatters.Logger.Logger()
316        return (self.info_bits & (1 | 4)) != 4
317
318    # probably a subclass of NSString. obtained this from [str pathExtension]
319    # here info_bits = 0 and Unicode data at the start of the padding word
320    # in the long run using the isa value might be safer as a way to identify this
321    # instead of reading the info_bits
322    def is_special_case(self):
323        logger = lldb.formatters.Logger.Logger()
324        return self.info_bits == 0
325
326    def is_unicode(self):
327        logger = lldb.formatters.Logger.Logger()
328        return (self.info_bits & 0x10) == 0x10
329
330    # preparing ourselves to read into memory
331    # by adjusting architecture-specific info
332    def adjust_for_architecture(self):
333        logger = lldb.formatters.Logger.Logger()
334        self.pointer_size = self.valobj.GetTarget().GetProcess().GetAddressByteSize()
335        self.is_64_bit = self.pointer_size == 8
336        self.is_little = self.valobj.GetTarget().GetProcess(
337        ).GetByteOrder() == lldb.eByteOrderLittle
338
339    # reading info bits out of the CFString and computing
340    # useful values to get at the real data
341    def compute_flags(self):
342        logger = lldb.formatters.Logger.Logger()
343        self.info_bits = self.read_info_bits()
344        if self.info_bits is None:
345            return
346        self.mutable = self.is_mutable()
347        self.inline = self.is_inline()
348        self.explicit = self.has_explicit_length()
349        self.unicode = self.is_unicode()
350        self.special = self.is_special_case()
351
352    def update(self):
353        logger = lldb.formatters.Logger.Logger()
354        self.adjust_for_architecture()
355        self.compute_flags()
356