1#!/usr/bin/env python3 2# 3# Copyright (C) 2013 The Android Open Source Project 4# 5# Licensed under the Apache License, Version 2.0 (the "License"); 6# you may not use this file except in compliance with the License. 7# You may obtain a copy of the License at 8# 9# http://www.apache.org/licenses/LICENSE-2.0 10# 11# Unless required by applicable law or agreed to in writing, software 12# distributed under the License is distributed on an "AS IS" BASIS, 13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14# See the License for the specific language governing permissions and 15# limitations under the License. 16 17"""Module for looking up symbolic debugging information. 18 19The information can include symbol names, offsets, and source locations. 20""" 21 22import atexit 23import glob 24import os 25import platform 26import re 27import shutil 28import signal 29import subprocess 30import unittest 31 32ANDROID_BUILD_TOP = os.environ.get("ANDROID_BUILD_TOP", ".") 33 34 35def FindClangDir(): 36 get_clang_version = ANDROID_BUILD_TOP + "/build/soong/scripts/get_clang_version.py" 37 if os.path.exists(get_clang_version): 38 # We want the script to fail if get_clang_version.py exists but is unable 39 # to find the clang version. 40 version_output = subprocess.check_output(get_clang_version, text=True) 41 return ANDROID_BUILD_TOP + "/prebuilts/clang/host/linux-x86/" + version_output.strip() 42 else: 43 return None 44 45 46def FindSymbolsDir(): 47 saveddir = os.getcwd() 48 os.chdir(ANDROID_BUILD_TOP) 49 stream = None 50 try: 51 cmd = "build/soong/soong_ui.bash --dumpvar-mode --abs TARGET_OUT_UNSTRIPPED" 52 stream = subprocess.Popen(cmd, stdout=subprocess.PIPE, universal_newlines=True, shell=True).stdout 53 return str(stream.read().strip()) 54 finally: 55 if stream is not None: 56 stream.close() 57 os.chdir(saveddir) 58 59SYMBOLS_DIR = FindSymbolsDir() 60 61ARCH_IS_32BIT = None 62 63VERBOSE = False 64 65# These are private. Do not access them from other modules. 66_CACHED_TOOLCHAIN = None 67_CACHED_CXX_FILT = None 68 69# Caches for symbolized information. 70_SYMBOL_INFORMATION_ADDR2LINE_CACHE = {} 71_SYMBOL_INFORMATION_OBJDUMP_CACHE = {} 72_SYMBOL_DEMANGLING_CACHE = {} 73 74# Caches for pipes to subprocesses. 75 76class ProcessCache: 77 _cmd2pipe = {} 78 _lru = [] 79 80 # Max number of open pipes. 81 _PIPE_MAX_OPEN = 10 82 83 def GetProcess(self, cmd): 84 cmd_tuple = tuple(cmd) # Need to use a tuple as lists can't be dict keys. 85 # Pipe already available? 86 if cmd_tuple in self._cmd2pipe: 87 pipe = self._cmd2pipe[cmd_tuple] 88 # Update LRU. 89 self._lru = [(cmd_tuple, pipe)] + [i for i in self._lru if i[0] != cmd_tuple] 90 return pipe 91 92 # Not cached, yet. Open a new one. 93 94 # Check if too many are open, close the old ones. 95 while len(self._lru) >= self._PIPE_MAX_OPEN: 96 open_cmd, open_pipe = self._lru.pop() 97 del self._cmd2pipe[open_cmd] 98 self.TerminateProcess(open_pipe) 99 100 # Create and put into cache. 101 pipe = self.SpawnProcess(cmd) 102 self._cmd2pipe[cmd_tuple] = pipe 103 self._lru = [(cmd_tuple, pipe)] + self._lru 104 return pipe 105 106 def SpawnProcess(self, cmd): 107 return subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE, universal_newlines=True) 108 109 def TerminateProcess(self, pipe): 110 pipe.stdin.close() 111 pipe.stdout.close() 112 pipe.terminate() 113 pipe.wait() 114 115 def KillAllProcesses(self): 116 for _, open_pipe in self._lru: 117 self.TerminateProcess(open_pipe) 118 _cmd2pipe = {} 119 _lru = [] 120 121 122_PIPE_ADDR2LINE_CACHE = ProcessCache() 123_PIPE_CPPFILT_CACHE = ProcessCache() 124 125 126# Process cache cleanup on shutdown. 127 128def CloseAllPipes(): 129 _PIPE_ADDR2LINE_CACHE.KillAllProcesses() 130 _PIPE_CPPFILT_CACHE.KillAllProcesses() 131 132 133atexit.register(CloseAllPipes) 134 135 136def PipeTermHandler(signum, frame): 137 CloseAllPipes() 138 os._exit(0) 139 140 141for sig in (signal.SIGABRT, signal.SIGINT, signal.SIGTERM): 142 signal.signal(sig, PipeTermHandler) 143 144 145 146 147def ToolPath(tool, toolchain=None): 148 """Return a fully-qualified path to the specified tool, or just the tool if it's on PATH """ 149 if shutil.which(tool): 150 return tool 151 if not toolchain: 152 toolchain = FindToolchain() 153 return os.path.join(toolchain, tool) 154 155 156def FindToolchain(): 157 """Returns the toolchain.""" 158 159 global _CACHED_TOOLCHAIN 160 if _CACHED_TOOLCHAIN: 161 return _CACHED_TOOLCHAIN 162 163 llvm_binutils_dir = ANDROID_BUILD_TOP + "/prebuilts/clang/host/linux-x86/llvm-binutils-stable/"; 164 if not os.path.exists(llvm_binutils_dir): 165 raise Exception("Could not find llvm tool chain directory %s" % (llvm_binutils_dir)) 166 167 _CACHED_TOOLCHAIN = llvm_binutils_dir 168 print("Using toolchain from:", _CACHED_TOOLCHAIN) 169 return _CACHED_TOOLCHAIN 170 171 172def SymbolInformation(lib, addr): 173 """Look up symbol information about an address. 174 175 Args: 176 lib: library (or executable) pathname containing symbols 177 addr: string hexidecimal address 178 179 Returns: 180 A list of the form [(source_symbol, source_location, 181 object_symbol_with_offset)]. 182 183 If the function has been inlined then the list may contain 184 more than one element with the symbols for the most deeply 185 nested inlined location appearing first. The list is 186 always non-empty, even if no information is available. 187 188 Usually you want to display the source_location and 189 object_symbol_with_offset from the last element in the list. 190 """ 191 info = SymbolInformationForSet(lib, set([addr])) 192 return (info and info.get(addr)) or [(None, None, None)] 193 194 195def SymbolInformationForSet(lib, unique_addrs): 196 """Look up symbol information for a set of addresses from the given library. 197 198 Args: 199 lib: library (or executable) pathname containing symbols 200 unique_addrs: set of hexidecimal addresses 201 202 Returns: 203 A dictionary of the form {addr: [(source_symbol, source_location, 204 object_symbol_with_offset)]} where each address has a list of 205 associated symbols and locations. The list is always non-empty. 206 207 If the function has been inlined then the list may contain 208 more than one element with the symbols for the most deeply 209 nested inlined location appearing first. The list is 210 always non-empty, even if no information is available. 211 212 Usually you want to display the source_location and 213 object_symbol_with_offset from the last element in the list. 214 """ 215 if not lib: 216 return None 217 218 addr_to_line = CallLlvmSymbolizerForSet(lib, unique_addrs) 219 if not addr_to_line: 220 return None 221 222 addr_to_objdump = CallObjdumpForSet(lib, unique_addrs) 223 if not addr_to_objdump: 224 return None 225 226 result = {} 227 for addr in unique_addrs: 228 source_info = addr_to_line.get(addr) 229 if not source_info: 230 source_info = [(None, None)] 231 if addr in addr_to_objdump: 232 (object_symbol, object_offset) = addr_to_objdump.get(addr) 233 object_symbol_with_offset = FormatSymbolWithOffset(object_symbol, 234 object_offset) 235 else: 236 object_symbol_with_offset = None 237 result[addr] = [(source_symbol, source_location, object_symbol_with_offset) 238 for (source_symbol, source_location) in source_info] 239 240 return result 241 242 243def CallLlvmSymbolizerForSet(lib, unique_addrs): 244 """Look up line and symbol information for a set of addresses. 245 246 Args: 247 lib: library (or executable) pathname containing symbols 248 unique_addrs: set of string hexidecimal addresses look up. 249 250 Returns: 251 A dictionary of the form {addr: [(symbol, file:line)]} where 252 each address has a list of associated symbols and locations 253 or an empty list if no symbol information was found. 254 255 If the function has been inlined then the list may contain 256 more than one element with the symbols for the most deeply 257 nested inlined location appearing first. 258 """ 259 if not lib: 260 return None 261 262 result = {} 263 addrs = sorted(unique_addrs) 264 265 if lib in _SYMBOL_INFORMATION_ADDR2LINE_CACHE: 266 addr_cache = _SYMBOL_INFORMATION_ADDR2LINE_CACHE[lib] 267 268 # Go through and handle all known addresses. 269 for x in range(len(addrs)): 270 next_addr = addrs.pop(0) 271 if next_addr in addr_cache: 272 result[next_addr] = addr_cache[next_addr] 273 else: 274 # Re-add, needs to be symbolized. 275 addrs.append(next_addr) 276 277 if not addrs: 278 # Everything was cached, we're done. 279 return result 280 else: 281 addr_cache = {} 282 _SYMBOL_INFORMATION_ADDR2LINE_CACHE[lib] = addr_cache 283 284 symbols = SYMBOLS_DIR + lib 285 if not os.path.exists(symbols): 286 symbols = lib 287 if not os.path.exists(symbols): 288 return None 289 290 # Make sure the symbols path is not a directory. 291 if os.path.isdir(symbols): 292 return None 293 294 cmd = [ToolPath("llvm-symbolizer"), "--functions", "--inlines", 295 "--demangle", "--obj=" + symbols, "--output-style=GNU"] 296 child = _PIPE_ADDR2LINE_CACHE.GetProcess(cmd) 297 298 for addr in addrs: 299 try: 300 child.stdin.write("0x%s\n" % addr) 301 child.stdin.flush() 302 records = [] 303 first = True 304 while True: 305 symbol = child.stdout.readline().strip() 306 if not symbol: 307 break 308 location = child.stdout.readline().strip() 309 records.append((symbol, location)) 310 if first: 311 # Write a blank line as a sentinel so we know when to stop 312 # reading inlines from the output. 313 # The blank line will cause llvm-symbolizer to emit a blank line. 314 child.stdin.write("\n") 315 child.stdin.flush() 316 first = False 317 except IOError as e: 318 # Remove the / in front of the library name to match other output. 319 records = [(None, lib[1:] + " ***Error: " + str(e))] 320 result[addr] = records 321 addr_cache[addr] = records 322 return result 323 324 325def CallObjdumpForSet(lib, unique_addrs): 326 """Use objdump to find out the names of the containing functions. 327 328 Args: 329 lib: library (or executable) pathname containing symbols 330 unique_addrs: set of string hexidecimal addresses to find the functions for. 331 332 Returns: 333 A dictionary of the form {addr: (string symbol, offset)}. 334 """ 335 if not lib: 336 return None 337 338 result = {} 339 addrs = sorted(unique_addrs) 340 341 addr_cache = None 342 if lib in _SYMBOL_INFORMATION_OBJDUMP_CACHE: 343 addr_cache = _SYMBOL_INFORMATION_OBJDUMP_CACHE[lib] 344 345 # Go through and handle all known addresses. 346 for x in range(len(addrs)): 347 next_addr = addrs.pop(0) 348 if next_addr in addr_cache: 349 result[next_addr] = addr_cache[next_addr] 350 else: 351 # Re-add, needs to be symbolized. 352 addrs.append(next_addr) 353 354 if not addrs: 355 # Everything was cached, we're done. 356 return result 357 else: 358 addr_cache = {} 359 _SYMBOL_INFORMATION_OBJDUMP_CACHE[lib] = addr_cache 360 361 symbols = SYMBOLS_DIR + lib 362 if not os.path.exists(symbols): 363 symbols = lib 364 if not os.path.exists(symbols): 365 return None 366 367 start_addr_dec = str(int(addrs[0], 16)) 368 stop_addr_dec = str(int(addrs[-1], 16) + 8) 369 cmd = [ToolPath("llvm-objdump"), 370 "--section=.text", 371 "--demangle", 372 "--disassemble", 373 "--start-address=" + start_addr_dec, 374 "--stop-address=" + stop_addr_dec, 375 symbols] 376 377 # Function lines look like: 378 # 000177b0 <android::IBinder::~IBinder()+0x2c>: 379 # We pull out the address and function first. Then we check for an optional 380 # offset. This is tricky due to functions that look like "operator+(..)+0x2c" 381 func_regexp = re.compile("(^[a-f0-9]*) \<(.*)\>:$") 382 offset_regexp = re.compile("(.*)\+0x([a-f0-9]*)") 383 384 # A disassembly line looks like: 385 # 177b2: b510 push {r4, lr} 386 asm_regexp = re.compile("(^[ a-f0-9]*):[ a-f0-0]*.*$") 387 388 current_symbol = None # The current function symbol in the disassembly. 389 current_symbol_addr = 0 # The address of the current function. 390 addr_index = 0 # The address that we are currently looking for. 391 392 stream = subprocess.Popen(cmd, stdout=subprocess.PIPE, universal_newlines=True).stdout 393 for line in stream: 394 # Is it a function line like: 395 # 000177b0 <android::IBinder::~IBinder()>: 396 components = func_regexp.match(line) 397 if components: 398 # This is a new function, so record the current function and its address. 399 current_symbol_addr = int(components.group(1), 16) 400 current_symbol = components.group(2) 401 402 # Does it have an optional offset like: "foo(..)+0x2c"? 403 components = offset_regexp.match(current_symbol) 404 if components: 405 current_symbol = components.group(1) 406 offset = components.group(2) 407 if offset: 408 current_symbol_addr -= int(offset, 16) 409 410 # Is it an disassembly line like: 411 # 177b2: b510 push {r4, lr} 412 components = asm_regexp.match(line) 413 if components: 414 addr = components.group(1) 415 target_addr = addrs[addr_index] 416 i_addr = int(addr, 16) 417 i_target = int(target_addr, 16) 418 if i_addr == i_target: 419 result[target_addr] = (current_symbol, i_target - current_symbol_addr) 420 addr_cache[target_addr] = result[target_addr] 421 addr_index += 1 422 if addr_index >= len(addrs): 423 break 424 stream.close() 425 426 return result 427 428 429def CallCppFilt(mangled_symbol): 430 if mangled_symbol in _SYMBOL_DEMANGLING_CACHE: 431 return _SYMBOL_DEMANGLING_CACHE[mangled_symbol] 432 433 global _CACHED_CXX_FILT 434 if not _CACHED_CXX_FILT: 435 toolchains = None 436 clang_dir = FindClangDir() 437 if clang_dir: 438 if os.path.exists(clang_dir + "/bin/llvm-cxxfilt"): 439 toolchains = [clang_dir + "/bin/llvm-cxxfilt"] 440 else: 441 raise Exception("bin/llvm-cxxfilt missing from " + clang_dir) 442 else: 443 # When run in CI, we don't have a way to find the clang version. But 444 # llvm-cxxfilt should be available in the following relative path. 445 toolchains = glob.glob("./clang-r*/bin/llvm-cxxfilt") 446 if toolchains and len(toolchains) != 1: 447 raise Exception("Expected one llvm-cxxfilt but found many: " + \ 448 ", ".join(toolchains)) 449 if not toolchains: 450 raise Exception("Could not find llvm-cxxfilt tool") 451 _CACHED_CXX_FILT = sorted(toolchains)[-1] 452 453 cmd = [_CACHED_CXX_FILT] 454 process = _PIPE_CPPFILT_CACHE.GetProcess(cmd) 455 process.stdin.write(mangled_symbol) 456 process.stdin.write("\n") 457 process.stdin.flush() 458 459 demangled_symbol = process.stdout.readline().strip() 460 461 _SYMBOL_DEMANGLING_CACHE[mangled_symbol] = demangled_symbol 462 463 return demangled_symbol 464 465 466def FormatSymbolWithOffset(symbol, offset): 467 if offset == 0: 468 return symbol 469 return "%s+%d" % (symbol, offset) 470 471def FormatSymbolWithoutParameters(symbol): 472 """Remove parameters from function. 473 474 Rather than trying to parse the demangled C++ signature, 475 it just removes matching top level parenthesis. 476 """ 477 if not symbol: 478 return symbol 479 480 result = symbol 481 result = result.replace(") const", ")") # Strip const keyword. 482 result = result.replace("operator<<", "operator\u00AB") # Avoid unmatched '<'. 483 result = result.replace("operator>>", "operator\u00BB") # Avoid unmatched '>'. 484 result = result.replace("operator->", "operator\u2192") # Avoid unmatched '>'. 485 486 nested = [] # Keeps tract of current nesting level of parenthesis. 487 for i in reversed(range(len(result))): # Iterate backward to make cutting easier. 488 c = result[i] 489 if c == ')' or c == '>': 490 if len(nested) == 0: 491 end = i + 1 # Mark the end of top-level pair. 492 nested.append(c) 493 if c == '(' or c == '<': 494 if len(nested) == 0 or {')':'(', '>':'<'}[nested.pop()] != c: 495 return symbol # Malformed: character does not match its pair. 496 if len(nested) == 0 and c == '(' and (end - i) > 2: 497 result = result[:i] + result[end:] # Remove substring (i, end). 498 if len(nested) > 0: 499 return symbol # Malformed: missing pair. 500 501 return result.strip() 502 503def SetBitness(lines): 504 global ARCH_IS_32BIT 505 506 trace_line = re.compile("\#[0-9]+[ \t]+..[ \t]+([0-9a-f]{8}|[0-9a-f]{16})([ \t]+|$)") 507 asan_trace_line = re.compile("\#[0-9]+[ \t]+0x([0-9a-f]+)[ \t]+") 508 509 ARCH_IS_32BIT = False 510 for line in lines: 511 trace_match = trace_line.search(line) 512 if trace_match: 513 # Try to guess the arch, we know the bitness. 514 if len(trace_match.group(1)) == 16: 515 ARCH_IS_32BIT = False 516 else: 517 ARCH_IS_32BIT = True 518 break 519 asan_trace_match = asan_trace_line.search(line) 520 if asan_trace_match: 521 # We might be able to guess the bitness by the length of the address. 522 if len(asan_trace_match.group(1)) > 8: 523 ARCH_IS_32BIT = False 524 # We know for a fact this is 64 bit, so we are done. 525 break 526 else: 527 # This might be 32 bit, or just a small address. Keep going in this 528 # case, but if we couldn't figure anything else out, go with 32 bit. 529 ARCH_IS_32BIT = True 530 531class FindClangDirTests(unittest.TestCase): 532 @unittest.skipIf(ANDROID_BUILD_TOP == '.', 'Test only supported in an Android tree.') 533 def test_clang_dir_found(self): 534 self.assertIsNotNone(FindClangDir()) 535 536class SetBitnessTests(unittest.TestCase): 537 def test_32bit_check(self): 538 global ARCH_IS_32BIT 539 540 SetBitness(["#00 pc 000374e0"]) 541 self.assertTrue(ARCH_IS_32BIT) 542 543 def test_64bit_check(self): 544 global ARCH_IS_32BIT 545 546 SetBitness(["#00 pc 00000000000374e0"]) 547 self.assertFalse(ARCH_IS_32BIT) 548 549 def test_32bit_asan_trace_line_toolchain(self): 550 global ARCH_IS_32BIT 551 552 SetBitness(["#10 0xb5eeba5d (/system/vendor/lib/egl/libGLESv1_CM_adreno.so+0xfa5d)"]) 553 self.assertTrue(ARCH_IS_32BIT) 554 555 def test_64bit_asan_trace_line_toolchain(self): 556 global ARCH_IS_32BIT 557 558 SetBitness(["#12 0x5d33bf (/system/lib/libclang_rt.asan-arm-android.so+0x823bf)", 559 "#12 0x11b35d33bf (/system/lib/libclang_rt.asan-arm-android.so+0x823bf)"]) 560 self.assertFalse(ARCH_IS_32BIT) 561 562class FormatSymbolWithoutParametersTests(unittest.TestCase): 563 def test_c(self): 564 self.assertEqual(FormatSymbolWithoutParameters("foo"), "foo") 565 self.assertEqual(FormatSymbolWithoutParameters("foo+42"), "foo+42") 566 567 def test_simple(self): 568 self.assertEqual(FormatSymbolWithoutParameters("foo(int i)"), "foo") 569 self.assertEqual(FormatSymbolWithoutParameters("foo(int i)+42"), "foo+42") 570 self.assertEqual(FormatSymbolWithoutParameters("bar::foo(int i)+42"), "bar::foo+42") 571 self.assertEqual(FormatSymbolWithoutParameters("operator()"), "operator()") 572 573 def test_templates(self): 574 self.assertEqual(FormatSymbolWithoutParameters("bar::foo<T>(vector<T>& v)"), "bar::foo<T>") 575 self.assertEqual(FormatSymbolWithoutParameters("bar<T>::foo(vector<T>& v)"), "bar<T>::foo") 576 self.assertEqual(FormatSymbolWithoutParameters("bar::foo<T>(vector<T<U>>& v)"), "bar::foo<T>") 577 self.assertEqual(FormatSymbolWithoutParameters("bar::foo<(EnumType)0>(vector<(EnumType)0>& v)"), 578 "bar::foo<(EnumType)0>") 579 580 def test_nested(self): 581 self.assertEqual(FormatSymbolWithoutParameters("foo(int i)::bar(int j)"), "foo::bar") 582 583 def test_unbalanced(self): 584 self.assertEqual(FormatSymbolWithoutParameters("foo(bar(int i)"), "foo(bar(int i)") 585 self.assertEqual(FormatSymbolWithoutParameters("foo)bar(int i)"), "foo)bar(int i)") 586 self.assertEqual(FormatSymbolWithoutParameters("foo<bar(int i)"), "foo<bar(int i)") 587 self.assertEqual(FormatSymbolWithoutParameters("foo>bar(int i)"), "foo>bar(int i)") 588 589if __name__ == '__main__': 590 unittest.main(verbosity=2) 591