1""" 2LLDB AppKit formatters 3 4Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 5See https://llvm.org/LICENSE.txt for license information. 6SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 7""" 8# example synthetic children and summary provider for CFString (and related NSString class) 9# the real code is part of the LLDB core 10import lldb 11import lldb.runtime.objc.objc_runtime 12import lldb.formatters.Logger 13 14try: 15 unichr 16except NameError: 17 unichr = chr 18 19def CFString_SummaryProvider(valobj, dict): 20 logger = lldb.formatters.Logger.Logger() 21 provider = CFStringSynthProvider(valobj, dict) 22 if not provider.invalid: 23 try: 24 summary = provider.get_child_at_index( 25 provider.get_child_index("content")) 26 if isinstance(summary, lldb.SBValue): 27 summary = summary.GetSummary() 28 else: 29 summary = '"' + summary + '"' 30 except: 31 summary = None 32 if summary is None: 33 summary = '<variable is not NSString>' 34 return '@' + summary 35 return '' 36 37 38def CFAttributedString_SummaryProvider(valobj, dict): 39 logger = lldb.formatters.Logger.Logger() 40 offset = valobj.GetTarget().GetProcess().GetAddressByteSize() 41 pointee = valobj.GetValueAsUnsigned(0) 42 summary = '<variable is not NSAttributedString>' 43 if pointee is not None and pointee != 0: 44 pointee = pointee + offset 45 child_ptr = valobj.CreateValueFromAddress( 46 "string_ptr", pointee, valobj.GetType()) 47 child = child_ptr.CreateValueFromAddress( 48 "string_data", 49 child_ptr.GetValueAsUnsigned(), 50 valobj.GetType()).AddressOf() 51 provider = CFStringSynthProvider(child, dict) 52 if not provider.invalid: 53 try: 54 summary = provider.get_child_at_index( 55 provider.get_child_index("content")).GetSummary() 56 except: 57 summary = '<variable is not NSAttributedString>' 58 if summary is None: 59 summary = '<variable is not NSAttributedString>' 60 return '@' + summary 61 62 63def __lldb_init_module(debugger, dict): 64 debugger.HandleCommand( 65 "type summary add -F CFString.CFString_SummaryProvider NSString CFStringRef CFMutableStringRef") 66 debugger.HandleCommand( 67 "type summary add -F CFString.CFAttributedString_SummaryProvider NSAttributedString") 68 69 70class CFStringSynthProvider: 71 72 def __init__(self, valobj, dict): 73 logger = lldb.formatters.Logger.Logger() 74 self.valobj = valobj 75 self.update() 76 77 # children other than "content" are for debugging only and must not be 78 # used in production code 79 def num_children(self): 80 logger = lldb.formatters.Logger.Logger() 81 if self.invalid: 82 return 0 83 return 6 84 85 def read_unicode(self, pointer, max_len=2048): 86 logger = lldb.formatters.Logger.Logger() 87 process = self.valobj.GetTarget().GetProcess() 88 error = lldb.SBError() 89 pystr = u'' 90 # cannot do the read at once because the length value has 91 # a weird encoding. better play it safe here 92 while max_len > 0: 93 content = process.ReadMemory(pointer, 2, error) 94 new_bytes = bytearray(content) 95 b0 = new_bytes[0] 96 b1 = new_bytes[1] 97 pointer = pointer + 2 98 if b0 == 0 and b1 == 0: 99 break 100 # rearrange bytes depending on endianness 101 # (do we really need this or is Cocoa going to 102 # use Windows-compatible little-endian even 103 # if the target is big endian?) 104 if self.is_little: 105 value = b1 * 256 + b0 106 else: 107 value = b0 * 256 + b1 108 pystr = pystr + unichr(value) 109 # read max_len unicode values, not max_len bytes 110 max_len = max_len - 1 111 return pystr 112 113 # handle the special case strings 114 # only use the custom code for the tested LP64 case 115 def handle_special(self): 116 logger = lldb.formatters.Logger.Logger() 117 if not self.is_64_bit: 118 # for 32bit targets, use safe ObjC code 119 return self.handle_unicode_string_safe() 120 offset = 12 121 pointer = self.valobj.GetValueAsUnsigned(0) + offset 122 pystr = self.read_unicode(pointer) 123 return self.valobj.CreateValueFromExpression( 124 "content", "(char*)\"" + pystr.encode('utf-8') + "\"") 125 126 # last resort call, use ObjC code to read; the final aim is to 127 # be able to strip this call away entirely and only do the read 128 # ourselves 129 def handle_unicode_string_safe(self): 130 return self.valobj.CreateValueFromExpression( 131 "content", "(char*)\"" + self.valobj.GetObjectDescription() + "\"") 132 133 def handle_unicode_string(self): 134 logger = lldb.formatters.Logger.Logger() 135 # step 1: find offset 136 if self.inline: 137 pointer = self.valobj.GetValueAsUnsigned( 138 0) + self.size_of_cfruntime_base() 139 if not self.explicit: 140 # untested, use the safe code path 141 return self.handle_unicode_string_safe() 142 else: 143 # a full pointer is skipped here before getting to the live 144 # data 145 pointer = pointer + self.pointer_size 146 else: 147 pointer = self.valobj.GetValueAsUnsigned( 148 0) + self.size_of_cfruntime_base() 149 # read 8 bytes here and make an address out of them 150 try: 151 char_type = self.valobj.GetType().GetBasicType( 152 lldb.eBasicTypeChar).GetPointerType() 153 vopointer = self.valobj.CreateValueFromAddress( 154 "dummy", pointer, char_type) 155 pointer = vopointer.GetValueAsUnsigned(0) 156 except: 157 return self.valobj.CreateValueFromExpression( 158 "content", '(char*)"@\"invalid NSString\""') 159 # step 2: read Unicode data at pointer 160 pystr = self.read_unicode(pointer) 161 # step 3: return it 162 return pystr.encode('utf-8') 163 164 def handle_inline_explicit(self): 165 logger = lldb.formatters.Logger.Logger() 166 offset = 3 * self.pointer_size 167 offset = offset + self.valobj.GetValueAsUnsigned(0) 168 return self.valobj.CreateValueFromExpression( 169 "content", "(char*)(" + str(offset) + ")") 170 171 def handle_mutable_string(self): 172 logger = lldb.formatters.Logger.Logger() 173 offset = 2 * self.pointer_size 174 data = self.valobj.CreateChildAtOffset( 175 "content", offset, self.valobj.GetType().GetBasicType( 176 lldb.eBasicTypeChar).GetPointerType()) 177 data_value = data.GetValueAsUnsigned(0) 178 if self.explicit and self.unicode: 179 return self.read_unicode(data_value).encode('utf-8') 180 else: 181 data_value = data_value + 1 182 return self.valobj.CreateValueFromExpression( 183 "content", "(char*)(" + str(data_value) + ")") 184 185 def handle_UTF8_inline(self): 186 logger = lldb.formatters.Logger.Logger() 187 offset = self.valobj.GetValueAsUnsigned( 188 0) + self.size_of_cfruntime_base() 189 if not self.explicit: 190 offset = offset + 1 191 return self.valobj.CreateValueFromAddress( 192 "content", offset, self.valobj.GetType().GetBasicType( 193 lldb.eBasicTypeChar)).AddressOf() 194 195 def handle_UTF8_not_inline(self): 196 logger = lldb.formatters.Logger.Logger() 197 offset = self.size_of_cfruntime_base() 198 return self.valobj.CreateChildAtOffset( 199 "content", offset, self.valobj.GetType().GetBasicType( 200 lldb.eBasicTypeChar).GetPointerType()) 201 202 def get_child_at_index(self, index): 203 logger = lldb.formatters.Logger.Logger() 204 logger >> "Querying for child [" + str(index) + "]" 205 if index == 0: 206 return self.valobj.CreateValueFromExpression( 207 "mutable", str(int(self.mutable))) 208 if index == 1: 209 return self.valobj.CreateValueFromExpression("inline", 210 str(int(self.inline))) 211 if index == 2: 212 return self.valobj.CreateValueFromExpression( 213 "explicit", str(int(self.explicit))) 214 if index == 3: 215 return self.valobj.CreateValueFromExpression( 216 "unicode", str(int(self.unicode))) 217 if index == 4: 218 return self.valobj.CreateValueFromExpression( 219 "special", str(int(self.special))) 220 if index == 5: 221 # we are handling the several possible combinations of flags. 222 # for each known combination we have a function that knows how to 223 # go fetch the data from memory instead of running code. if a string is not 224 # correctly displayed, one should start by finding a combination of flags that 225 # makes it different from these known cases, and provide a new reader function 226 # if this is not possible, a new flag might have to be made up (like the "special" flag 227 # below, which is not a real flag in CFString), or alternatively one might need to use 228 # the ObjC runtime helper to detect the new class and deal with it accordingly 229 # print 'mutable = ' + str(self.mutable) 230 # print 'inline = ' + str(self.inline) 231 # print 'explicit = ' + str(self.explicit) 232 # print 'unicode = ' + str(self.unicode) 233 # print 'special = ' + str(self.special) 234 if self.mutable: 235 return self.handle_mutable_string() 236 elif self.inline and self.explicit and \ 237 self.unicode == False and self.special == False and \ 238 self.mutable == False: 239 return self.handle_inline_explicit() 240 elif self.unicode: 241 return self.handle_unicode_string() 242 elif self.special: 243 return self.handle_special() 244 elif self.inline: 245 return self.handle_UTF8_inline() 246 else: 247 return self.handle_UTF8_not_inline() 248 249 def get_child_index(self, name): 250 logger = lldb.formatters.Logger.Logger() 251 logger >> "Querying for child ['" + str(name) + "']" 252 if name == "content": 253 return self.num_children() - 1 254 if name == "mutable": 255 return 0 256 if name == "inline": 257 return 1 258 if name == "explicit": 259 return 2 260 if name == "unicode": 261 return 3 262 if name == "special": 263 return 4 264 265 # CFRuntimeBase is defined as having an additional 266 # 4 bytes (padding?) on LP64 architectures 267 # to get its size we add up sizeof(pointer)+4 268 # and then add 4 more bytes if we are on a 64bit system 269 def size_of_cfruntime_base(self): 270 logger = lldb.formatters.Logger.Logger() 271 return self.pointer_size + 4 + (4 if self.is_64_bit else 0) 272 273 # the info bits are part of the CFRuntimeBase structure 274 # to get at them we have to skip a uintptr_t and then get 275 # at the least-significant byte of a 4 byte array. If we are 276 # on big-endian this means going to byte 3, if we are on 277 # little endian (OSX & iOS), this means reading byte 0 278 def offset_of_info_bits(self): 279 logger = lldb.formatters.Logger.Logger() 280 offset = self.pointer_size 281 if not self.is_little: 282 offset = offset + 3 283 return offset 284 285 def read_info_bits(self): 286 logger = lldb.formatters.Logger.Logger() 287 cfinfo = self.valobj.CreateChildAtOffset( 288 "cfinfo", 289 self.offset_of_info_bits(), 290 self.valobj.GetType().GetBasicType( 291 lldb.eBasicTypeChar)) 292 cfinfo.SetFormat(11) 293 info = cfinfo.GetValue() 294 if info is not None: 295 self.invalid = False 296 return int(info, 0) 297 else: 298 self.invalid = True 299 return None 300 301 # calculating internal flag bits of the CFString object 302 # this stuff is defined and discussed in CFString.c 303 def is_mutable(self): 304 logger = lldb.formatters.Logger.Logger() 305 return (self.info_bits & 1) == 1 306 307 def is_inline(self): 308 logger = lldb.formatters.Logger.Logger() 309 return (self.info_bits & 0x60) == 0 310 311 # this flag's name is ambiguous, it turns out 312 # we must skip a length byte to get at the data 313 # when this flag is False 314 def has_explicit_length(self): 315 logger = lldb.formatters.Logger.Logger() 316 return (self.info_bits & (1 | 4)) != 4 317 318 # probably a subclass of NSString. obtained this from [str pathExtension] 319 # here info_bits = 0 and Unicode data at the start of the padding word 320 # in the long run using the isa value might be safer as a way to identify this 321 # instead of reading the info_bits 322 def is_special_case(self): 323 logger = lldb.formatters.Logger.Logger() 324 return self.info_bits == 0 325 326 def is_unicode(self): 327 logger = lldb.formatters.Logger.Logger() 328 return (self.info_bits & 0x10) == 0x10 329 330 # preparing ourselves to read into memory 331 # by adjusting architecture-specific info 332 def adjust_for_architecture(self): 333 logger = lldb.formatters.Logger.Logger() 334 self.pointer_size = self.valobj.GetTarget().GetProcess().GetAddressByteSize() 335 self.is_64_bit = self.pointer_size == 8 336 self.is_little = self.valobj.GetTarget().GetProcess( 337 ).GetByteOrder() == lldb.eByteOrderLittle 338 339 # reading info bits out of the CFString and computing 340 # useful values to get at the real data 341 def compute_flags(self): 342 logger = lldb.formatters.Logger.Logger() 343 self.info_bits = self.read_info_bits() 344 if self.info_bits is None: 345 return 346 self.mutable = self.is_mutable() 347 self.inline = self.is_inline() 348 self.explicit = self.has_explicit_length() 349 self.unicode = self.is_unicode() 350 self.special = self.is_special_case() 351 352 def update(self): 353 logger = lldb.formatters.Logger.Logger() 354 self.adjust_for_architecture() 355 self.compute_flags() 356