1#!/usr/bin/env python3 2# 3# Copyright (C) 2013 The Android Open Source Project 4# 5# Licensed under the Apache License, Version 2.0 (the "License"); 6# you may not use this file except in compliance with the License. 7# You may obtain a copy of the License at 8# 9# http://www.apache.org/licenses/LICENSE-2.0 10# 11# Unless required by applicable law or agreed to in writing, software 12# distributed under the License is distributed on an "AS IS" BASIS, 13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14# See the License for the specific language governing permissions and 15# limitations under the License. 16 17"""Module for looking up symbolic debugging information. 18 19The information can include symbol names, offsets, and source locations. 20""" 21 22import atexit 23import json 24import glob 25import os 26import platform 27import re 28import shutil 29import signal 30import subprocess 31import unittest 32 33ANDROID_BUILD_TOP = os.environ.get("ANDROID_BUILD_TOP", ".") 34 35 36def FindClangDir(): 37 get_clang_version = ANDROID_BUILD_TOP + "/build/soong/scripts/get_clang_version.py" 38 if os.path.exists(get_clang_version): 39 # We want the script to fail if get_clang_version.py exists but is unable 40 # to find the clang version. 41 version_output = subprocess.check_output(get_clang_version, text=True) 42 return ANDROID_BUILD_TOP + "/prebuilts/clang/host/linux-x86/" + version_output.strip() 43 else: 44 return None 45 46 47def FindSymbolsDir(): 48 saveddir = os.getcwd() 49 os.chdir(ANDROID_BUILD_TOP) 50 stream = None 51 try: 52 cmd = "build/soong/soong_ui.bash --dumpvar-mode --abs TARGET_OUT_UNSTRIPPED" 53 stream = subprocess.Popen(cmd, stdout=subprocess.PIPE, universal_newlines=True, shell=True).stdout 54 return str(stream.read().strip()) 55 finally: 56 if stream is not None: 57 stream.close() 58 os.chdir(saveddir) 59 60SYMBOLS_DIR = FindSymbolsDir() 61 62ARCH_IS_32BIT = None 63 64VERBOSE = False 65 66# These are private. Do not access them from other modules. 67_CACHED_TOOLCHAIN = None 68_CACHED_CXX_FILT = None 69 70# Caches for symbolized information. 71_SYMBOL_INFORMATION_ADDR2LINE_CACHE = {} 72_SYMBOL_INFORMATION_OBJDUMP_CACHE = {} 73_SYMBOL_DEMANGLING_CACHE = {} 74 75# Caches for pipes to subprocesses. 76 77class ProcessCache: 78 _cmd2pipe = {} 79 _lru = [] 80 81 # Max number of open pipes. 82 _PIPE_MAX_OPEN = 10 83 84 def GetProcess(self, cmd): 85 cmd_tuple = tuple(cmd) # Need to use a tuple as lists can't be dict keys. 86 # Pipe already available? 87 if cmd_tuple in self._cmd2pipe: 88 pipe = self._cmd2pipe[cmd_tuple] 89 # Update LRU. 90 self._lru = [(cmd_tuple, pipe)] + [i for i in self._lru if i[0] != cmd_tuple] 91 return pipe 92 93 # Not cached, yet. Open a new one. 94 95 # Check if too many are open, close the old ones. 96 while len(self._lru) >= self._PIPE_MAX_OPEN: 97 open_cmd, open_pipe = self._lru.pop() 98 del self._cmd2pipe[open_cmd] 99 self.TerminateProcess(open_pipe) 100 101 # Create and put into cache. 102 pipe = self.SpawnProcess(cmd) 103 self._cmd2pipe[cmd_tuple] = pipe 104 self._lru = [(cmd_tuple, pipe)] + self._lru 105 return pipe 106 107 def SpawnProcess(self, cmd): 108 return subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE, universal_newlines=True) 109 110 def TerminateProcess(self, pipe): 111 pipe.stdin.close() 112 pipe.stdout.close() 113 pipe.terminate() 114 pipe.wait() 115 116 def KillAllProcesses(self): 117 for _, open_pipe in self._lru: 118 self.TerminateProcess(open_pipe) 119 _cmd2pipe = {} 120 _lru = [] 121 122 123_PIPE_ADDR2LINE_CACHE = ProcessCache() 124_PIPE_CPPFILT_CACHE = ProcessCache() 125 126 127# Process cache cleanup on shutdown. 128 129def CloseAllPipes(): 130 _PIPE_ADDR2LINE_CACHE.KillAllProcesses() 131 _PIPE_CPPFILT_CACHE.KillAllProcesses() 132 133 134atexit.register(CloseAllPipes) 135 136 137def PipeTermHandler(signum, frame): 138 CloseAllPipes() 139 os._exit(0) 140 141 142for sig in (signal.SIGABRT, signal.SIGINT, signal.SIGTERM): 143 signal.signal(sig, PipeTermHandler) 144 145 146 147 148def ToolPath(tool, toolchain=None): 149 """Return a fully-qualified path to the specified tool, or just the tool if it's on PATH """ 150 if shutil.which(tool): 151 return tool 152 if not toolchain: 153 toolchain = FindToolchain() 154 return os.path.join(toolchain, tool) 155 156 157def FindToolchain(): 158 """Returns the toolchain.""" 159 160 global _CACHED_TOOLCHAIN 161 if _CACHED_TOOLCHAIN: 162 return _CACHED_TOOLCHAIN 163 164 llvm_binutils_dir = ANDROID_BUILD_TOP + "/prebuilts/clang/host/linux-x86/llvm-binutils-stable/"; 165 if not os.path.exists(llvm_binutils_dir): 166 raise Exception("Could not find llvm tool chain directory %s" % (llvm_binutils_dir)) 167 168 _CACHED_TOOLCHAIN = llvm_binutils_dir 169 print("Using toolchain from:", _CACHED_TOOLCHAIN) 170 return _CACHED_TOOLCHAIN 171 172 173def SymbolInformation(lib, addr): 174 """Look up symbol information about an address. 175 176 Args: 177 lib: library (or executable) pathname containing symbols 178 addr: string hexidecimal address 179 180 Returns: 181 A list of the form [(source_symbol, source_location, 182 object_symbol_with_offset)]. 183 184 If the function has been inlined then the list may contain 185 more than one element with the symbols for the most deeply 186 nested inlined location appearing first. The list is 187 always non-empty, even if no information is available. 188 189 Usually you want to display the source_location and 190 object_symbol_with_offset from the last element in the list. 191 """ 192 info = SymbolInformationForSet(lib, set([addr])) 193 return (info and info.get(addr)) or [(None, None, None)] 194 195 196def SymbolInformationForSet(lib, unique_addrs): 197 """Look up symbol information for a set of addresses from the given library. 198 199 Args: 200 lib: library (or executable) pathname containing symbols 201 unique_addrs: set of hexidecimal addresses 202 203 Returns: 204 A dictionary of the form {addr: [(source_symbol, source_location, 205 object_symbol_with_offset)]} where each address has a list of 206 associated symbols and locations. The list is always non-empty. 207 208 If the function has been inlined then the list may contain 209 more than one element with the symbols for the most deeply 210 nested inlined location appearing first. The list is 211 always non-empty, even if no information is available. 212 213 Usually you want to display the source_location and 214 object_symbol_with_offset from the last element in the list. 215 """ 216 if not lib: 217 return None 218 219 addr_to_line = CallLlvmSymbolizerForSet(lib, unique_addrs) 220 if not addr_to_line: 221 return None 222 223 addr_to_objdump = CallObjdumpForSet(lib, unique_addrs) 224 if not addr_to_objdump: 225 return None 226 227 result = {} 228 for addr in unique_addrs: 229 source_info = addr_to_line.get(addr) 230 if not source_info: 231 source_info = [(None, None)] 232 if addr in addr_to_objdump: 233 (object_symbol, object_offset) = addr_to_objdump.get(addr) 234 object_symbol_with_offset = FormatSymbolWithOffset(object_symbol, 235 object_offset) 236 else: 237 object_symbol_with_offset = None 238 result[addr] = [(source_symbol, source_location, object_symbol_with_offset) 239 for (source_symbol, source_location) in source_info] 240 241 return result 242 243 244def CallLlvmSymbolizerForSet(lib, unique_addrs): 245 """Look up line and symbol information for a set of addresses. 246 247 Args: 248 lib: library (or executable) pathname containing symbols 249 unique_addrs: set of string hexidecimal addresses look up. 250 251 Returns: 252 A dictionary of the form {addr: [(symbol, file:line)]} where 253 each address has a list of associated symbols and locations 254 or an empty list if no symbol information was found. 255 256 If the function has been inlined then the list may contain 257 more than one element with the symbols for the most deeply 258 nested inlined location appearing first. 259 """ 260 if not lib: 261 return None 262 263 result = {} 264 addrs = sorted(unique_addrs) 265 266 if lib in _SYMBOL_INFORMATION_ADDR2LINE_CACHE: 267 addr_cache = _SYMBOL_INFORMATION_ADDR2LINE_CACHE[lib] 268 269 # Go through and handle all known addresses. 270 for x in range(len(addrs)): 271 next_addr = addrs.pop(0) 272 if next_addr in addr_cache: 273 result[next_addr] = addr_cache[next_addr] 274 else: 275 # Re-add, needs to be symbolized. 276 addrs.append(next_addr) 277 278 if not addrs: 279 # Everything was cached, we're done. 280 return result 281 else: 282 addr_cache = {} 283 _SYMBOL_INFORMATION_ADDR2LINE_CACHE[lib] = addr_cache 284 285 symbols = SYMBOLS_DIR + lib 286 if not os.path.exists(symbols): 287 symbols = lib 288 if not os.path.exists(symbols): 289 return None 290 291 # Make sure the symbols path is not a directory. 292 if os.path.isdir(symbols): 293 return None 294 295 cmd = [ToolPath("llvm-symbolizer"), "--functions", "--inlines", 296 "--demangle", "--obj=" + symbols, "--output-style=JSON"] 297 child = _PIPE_ADDR2LINE_CACHE.GetProcess(cmd) 298 299 for addr in addrs: 300 try: 301 child.stdin.write("0x%s\n" % addr) 302 child.stdin.flush() 303 records = [] 304 json_result = json.loads(child.stdout.readline().strip()) 305 for symbol in json_result["Symbol"]: 306 function_name = symbol["FunctionName"] 307 # GNU style location: file_name:line_num 308 location = ("%s:%s" % (symbol["FileName"], symbol["Line"])) 309 records.append((function_name, location)) 310 except IOError as e: 311 # Remove the / in front of the library name to match other output. 312 records = [(None, lib[1:] + " ***Error: " + str(e))] 313 result[addr] = records 314 addr_cache[addr] = records 315 return result 316 317 318def CallObjdumpForSet(lib, unique_addrs): 319 """Use objdump to find out the names of the containing functions. 320 321 Args: 322 lib: library (or executable) pathname containing symbols 323 unique_addrs: set of string hexidecimal addresses to find the functions for. 324 325 Returns: 326 A dictionary of the form {addr: (string symbol, offset)}. 327 """ 328 if not lib: 329 return None 330 331 result = {} 332 addrs = sorted(unique_addrs) 333 334 addr_cache = None 335 if lib in _SYMBOL_INFORMATION_OBJDUMP_CACHE: 336 addr_cache = _SYMBOL_INFORMATION_OBJDUMP_CACHE[lib] 337 338 # Go through and handle all known addresses. 339 for x in range(len(addrs)): 340 next_addr = addrs.pop(0) 341 if next_addr in addr_cache: 342 result[next_addr] = addr_cache[next_addr] 343 else: 344 # Re-add, needs to be symbolized. 345 addrs.append(next_addr) 346 347 if not addrs: 348 # Everything was cached, we're done. 349 return result 350 else: 351 addr_cache = {} 352 _SYMBOL_INFORMATION_OBJDUMP_CACHE[lib] = addr_cache 353 354 symbols = SYMBOLS_DIR + lib 355 if not os.path.exists(symbols): 356 symbols = lib 357 if not os.path.exists(symbols): 358 return None 359 360 start_addr_dec = str(int(addrs[0], 16)) 361 stop_addr_dec = str(int(addrs[-1], 16) + 8) 362 cmd = [ToolPath("llvm-objdump"), 363 "--section=.text", 364 "--demangle", 365 "--disassemble", 366 "--start-address=" + start_addr_dec, 367 "--stop-address=" + stop_addr_dec, 368 symbols] 369 370 # Function lines look like: 371 # 000177b0 <android::IBinder::~IBinder()+0x2c>: 372 # We pull out the address and function first. Then we check for an optional 373 # offset. This is tricky due to functions that look like "operator+(..)+0x2c" 374 func_regexp = re.compile("(^[a-f0-9]*) \<(.*)\>:$") 375 offset_regexp = re.compile("(.*)\+0x([a-f0-9]*)") 376 377 # A disassembly line looks like: 378 # 177b2: b510 push {r4, lr} 379 asm_regexp = re.compile("(^[ a-f0-9]*):[ a-f0-0]*.*$") 380 381 current_symbol = None # The current function symbol in the disassembly. 382 current_symbol_addr = 0 # The address of the current function. 383 addr_index = 0 # The address that we are currently looking for. 384 385 stream = subprocess.Popen(cmd, stdout=subprocess.PIPE, universal_newlines=True).stdout 386 for line in stream: 387 # Is it a function line like: 388 # 000177b0 <android::IBinder::~IBinder()>: 389 components = func_regexp.match(line) 390 if components: 391 # This is a new function, so record the current function and its address. 392 current_symbol_addr = int(components.group(1), 16) 393 current_symbol = components.group(2) 394 395 # Does it have an optional offset like: "foo(..)+0x2c"? 396 components = offset_regexp.match(current_symbol) 397 if components: 398 current_symbol = components.group(1) 399 offset = components.group(2) 400 if offset: 401 current_symbol_addr -= int(offset, 16) 402 403 # Is it an disassembly line like: 404 # 177b2: b510 push {r4, lr} 405 components = asm_regexp.match(line) 406 if components: 407 addr = components.group(1) 408 target_addr = addrs[addr_index] 409 i_addr = int(addr, 16) 410 i_target = int(target_addr, 16) 411 if i_addr == i_target: 412 result[target_addr] = (current_symbol, i_target - current_symbol_addr) 413 addr_cache[target_addr] = result[target_addr] 414 addr_index += 1 415 if addr_index >= len(addrs): 416 break 417 stream.close() 418 419 return result 420 421 422def CallCppFilt(mangled_symbol): 423 if mangled_symbol in _SYMBOL_DEMANGLING_CACHE: 424 return _SYMBOL_DEMANGLING_CACHE[mangled_symbol] 425 426 global _CACHED_CXX_FILT 427 if not _CACHED_CXX_FILT: 428 toolchains = None 429 clang_dir = FindClangDir() 430 if clang_dir: 431 if os.path.exists(clang_dir + "/bin/llvm-cxxfilt"): 432 toolchains = [clang_dir + "/bin/llvm-cxxfilt"] 433 else: 434 raise Exception("bin/llvm-cxxfilt missing from " + clang_dir) 435 else: 436 # When run in CI, we don't have a way to find the clang version. But 437 # llvm-cxxfilt should be available in the following relative path. 438 toolchains = glob.glob("./clang-r*/bin/llvm-cxxfilt") 439 if toolchains and len(toolchains) != 1: 440 raise Exception("Expected one llvm-cxxfilt but found many: " + \ 441 ", ".join(toolchains)) 442 if not toolchains: 443 raise Exception("Could not find llvm-cxxfilt tool") 444 _CACHED_CXX_FILT = sorted(toolchains)[-1] 445 446 cmd = [_CACHED_CXX_FILT] 447 process = _PIPE_CPPFILT_CACHE.GetProcess(cmd) 448 process.stdin.write(mangled_symbol) 449 process.stdin.write("\n") 450 process.stdin.flush() 451 452 demangled_symbol = process.stdout.readline().strip() 453 454 _SYMBOL_DEMANGLING_CACHE[mangled_symbol] = demangled_symbol 455 456 return demangled_symbol 457 458 459def FormatSymbolWithOffset(symbol, offset): 460 if offset == 0: 461 return symbol 462 return "%s+%d" % (symbol, offset) 463 464def FormatSymbolWithoutParameters(symbol): 465 """Remove parameters from function. 466 467 Rather than trying to parse the demangled C++ signature, 468 it just removes matching top level parenthesis. 469 """ 470 if not symbol: 471 return symbol 472 473 result = symbol 474 result = result.replace(") const", ")") # Strip const keyword. 475 result = result.replace("operator<<", "operator\u00AB") # Avoid unmatched '<'. 476 result = result.replace("operator>>", "operator\u00BB") # Avoid unmatched '>'. 477 result = result.replace("operator->", "operator\u2192") # Avoid unmatched '>'. 478 479 nested = [] # Keeps tract of current nesting level of parenthesis. 480 for i in reversed(range(len(result))): # Iterate backward to make cutting easier. 481 c = result[i] 482 if c == ')' or c == '>': 483 if len(nested) == 0: 484 end = i + 1 # Mark the end of top-level pair. 485 nested.append(c) 486 if c == '(' or c == '<': 487 if len(nested) == 0 or {')':'(', '>':'<'}[nested.pop()] != c: 488 return symbol # Malformed: character does not match its pair. 489 if len(nested) == 0 and c == '(' and (end - i) > 2: 490 result = result[:i] + result[end:] # Remove substring (i, end). 491 if len(nested) > 0: 492 return symbol # Malformed: missing pair. 493 494 return result.strip() 495 496def SetBitness(lines): 497 global ARCH_IS_32BIT 498 499 trace_line = re.compile("\#[0-9]+[ \t]+..[ \t]+([0-9a-f]{8}|[0-9a-f]{16})([ \t]+|$)") 500 asan_trace_line = re.compile("\#[0-9]+[ \t]+0x([0-9a-f]+)[ \t]+") 501 502 ARCH_IS_32BIT = False 503 for line in lines: 504 trace_match = trace_line.search(line) 505 if trace_match: 506 # Try to guess the arch, we know the bitness. 507 if len(trace_match.group(1)) == 16: 508 ARCH_IS_32BIT = False 509 else: 510 ARCH_IS_32BIT = True 511 break 512 asan_trace_match = asan_trace_line.search(line) 513 if asan_trace_match: 514 # We might be able to guess the bitness by the length of the address. 515 if len(asan_trace_match.group(1)) > 8: 516 ARCH_IS_32BIT = False 517 # We know for a fact this is 64 bit, so we are done. 518 break 519 else: 520 # This might be 32 bit, or just a small address. Keep going in this 521 # case, but if we couldn't figure anything else out, go with 32 bit. 522 ARCH_IS_32BIT = True 523 524class FindClangDirTests(unittest.TestCase): 525 @unittest.skipIf(ANDROID_BUILD_TOP == '.', 'Test only supported in an Android tree.') 526 def test_clang_dir_found(self): 527 self.assertIsNotNone(FindClangDir()) 528 529class SetBitnessTests(unittest.TestCase): 530 def test_32bit_check(self): 531 global ARCH_IS_32BIT 532 533 SetBitness(["#00 pc 000374e0"]) 534 self.assertTrue(ARCH_IS_32BIT) 535 536 def test_64bit_check(self): 537 global ARCH_IS_32BIT 538 539 SetBitness(["#00 pc 00000000000374e0"]) 540 self.assertFalse(ARCH_IS_32BIT) 541 542 def test_32bit_asan_trace_line_toolchain(self): 543 global ARCH_IS_32BIT 544 545 SetBitness(["#10 0xb5eeba5d (/system/vendor/lib/egl/libGLESv1_CM_adreno.so+0xfa5d)"]) 546 self.assertTrue(ARCH_IS_32BIT) 547 548 def test_64bit_asan_trace_line_toolchain(self): 549 global ARCH_IS_32BIT 550 551 SetBitness(["#12 0x5d33bf (/system/lib/libclang_rt.asan-arm-android.so+0x823bf)", 552 "#12 0x11b35d33bf (/system/lib/libclang_rt.asan-arm-android.so+0x823bf)"]) 553 self.assertFalse(ARCH_IS_32BIT) 554 555class FormatSymbolWithoutParametersTests(unittest.TestCase): 556 def test_c(self): 557 self.assertEqual(FormatSymbolWithoutParameters("foo"), "foo") 558 self.assertEqual(FormatSymbolWithoutParameters("foo+42"), "foo+42") 559 560 def test_simple(self): 561 self.assertEqual(FormatSymbolWithoutParameters("foo(int i)"), "foo") 562 self.assertEqual(FormatSymbolWithoutParameters("foo(int i)+42"), "foo+42") 563 self.assertEqual(FormatSymbolWithoutParameters("bar::foo(int i)+42"), "bar::foo+42") 564 self.assertEqual(FormatSymbolWithoutParameters("operator()"), "operator()") 565 566 def test_templates(self): 567 self.assertEqual(FormatSymbolWithoutParameters("bar::foo<T>(vector<T>& v)"), "bar::foo<T>") 568 self.assertEqual(FormatSymbolWithoutParameters("bar<T>::foo(vector<T>& v)"), "bar<T>::foo") 569 self.assertEqual(FormatSymbolWithoutParameters("bar::foo<T>(vector<T<U>>& v)"), "bar::foo<T>") 570 self.assertEqual(FormatSymbolWithoutParameters("bar::foo<(EnumType)0>(vector<(EnumType)0>& v)"), 571 "bar::foo<(EnumType)0>") 572 573 def test_nested(self): 574 self.assertEqual(FormatSymbolWithoutParameters("foo(int i)::bar(int j)"), "foo::bar") 575 576 def test_unbalanced(self): 577 self.assertEqual(FormatSymbolWithoutParameters("foo(bar(int i)"), "foo(bar(int i)") 578 self.assertEqual(FormatSymbolWithoutParameters("foo)bar(int i)"), "foo)bar(int i)") 579 self.assertEqual(FormatSymbolWithoutParameters("foo<bar(int i)"), "foo<bar(int i)") 580 self.assertEqual(FormatSymbolWithoutParameters("foo>bar(int i)"), "foo>bar(int i)") 581 582if __name__ == '__main__': 583 unittest.main(verbosity=2) 584