1#!/usr/bin/env python3 2# 3# Copyright (C) 2013 The Android Open Source Project 4# 5# Licensed under the Apache License, Version 2.0 (the "License"); 6# you may not use this file except in compliance with the License. 7# You may obtain a copy of the License at 8# 9# http://www.apache.org/licenses/LICENSE-2.0 10# 11# Unless required by applicable law or agreed to in writing, software 12# distributed under the License is distributed on an "AS IS" BASIS, 13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14# See the License for the specific language governing permissions and 15# limitations under the License. 16 17"""Module for looking up symbolic debugging information. 18 19The information can include symbol names, offsets, and source locations. 20""" 21 22import atexit 23import json 24import glob 25import os 26import platform 27import re 28import shutil 29import signal 30import subprocess 31import unittest 32 33ANDROID_BUILD_TOP = os.environ.get("ANDROID_BUILD_TOP", ".") 34 35 36def FindClangDir(): 37 get_clang_version = ANDROID_BUILD_TOP + "/build/soong/scripts/get_clang_version.py" 38 if os.path.exists(get_clang_version): 39 # We want the script to fail if get_clang_version.py exists but is unable 40 # to find the clang version. 41 version_output = subprocess.check_output(get_clang_version, text=True) 42 return ANDROID_BUILD_TOP + "/prebuilts/clang/host/linux-x86/" + version_output.strip() 43 else: 44 return None 45 46 47def FindSymbolsDir(): 48 saveddir = os.getcwd() 49 os.chdir(ANDROID_BUILD_TOP) 50 stream = None 51 try: 52 cmd = "build/soong/soong_ui.bash --dumpvar-mode --abs TARGET_OUT_UNSTRIPPED" 53 stream = subprocess.Popen(cmd, stdout=subprocess.PIPE, universal_newlines=True, shell=True).stdout 54 return str(stream.read().strip()) 55 finally: 56 if stream is not None: 57 stream.close() 58 os.chdir(saveddir) 59 60SYMBOLS_DIR = FindSymbolsDir() 61 62ARCH_IS_32BIT = None 63 64VERBOSE = False 65 66# These are private. Do not access them from other modules. 67_CACHED_TOOLCHAIN = None 68_CACHED_CXX_FILT = None 69 70# Caches for symbolized information. 71_SYMBOL_INFORMATION_ADDR2LINE_CACHE = {} 72_SYMBOL_INFORMATION_OBJDUMP_CACHE = {} 73_SYMBOL_DEMANGLING_CACHE = {} 74 75# Caches for pipes to subprocesses. 76 77class ProcessCache: 78 _cmd2pipe = {} 79 _lru = [] 80 81 # Max number of open pipes. 82 _PIPE_MAX_OPEN = 10 83 84 def GetProcess(self, cmd): 85 cmd_tuple = tuple(cmd) # Need to use a tuple as lists can't be dict keys. 86 # Pipe already available? 87 if cmd_tuple in self._cmd2pipe: 88 pipe = self._cmd2pipe[cmd_tuple] 89 # Update LRU. 90 self._lru = [(cmd_tuple, pipe)] + [i for i in self._lru if i[0] != cmd_tuple] 91 return pipe 92 93 # Not cached, yet. Open a new one. 94 95 # Check if too many are open, close the old ones. 96 while len(self._lru) >= self._PIPE_MAX_OPEN: 97 open_cmd, open_pipe = self._lru.pop() 98 del self._cmd2pipe[open_cmd] 99 self.TerminateProcess(open_pipe) 100 101 # Create and put into cache. 102 pipe = self.SpawnProcess(cmd) 103 self._cmd2pipe[cmd_tuple] = pipe 104 self._lru = [(cmd_tuple, pipe)] + self._lru 105 return pipe 106 107 def SpawnProcess(self, cmd): 108 return subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE, universal_newlines=True) 109 110 def TerminateProcess(self, pipe): 111 if pipe.poll() is None: 112 # Process is still running. 113 pipe.stdin.close() 114 pipe.stdout.close() 115 pipe.terminate() 116 pipe.wait() 117 118 def KillAllProcesses(self): 119 for _, open_pipe in self._lru: 120 self.TerminateProcess(open_pipe) 121 _cmd2pipe = {} 122 _lru = [] 123 124 125_PIPE_ADDR2LINE_CACHE = ProcessCache() 126_PIPE_CPPFILT_CACHE = ProcessCache() 127 128 129# Process cache cleanup on shutdown. 130 131def CloseAllPipes(): 132 _PIPE_ADDR2LINE_CACHE.KillAllProcesses() 133 _PIPE_CPPFILT_CACHE.KillAllProcesses() 134 135 136atexit.register(CloseAllPipes) 137 138 139def PipeTermHandler(signum, frame): 140 CloseAllPipes() 141 os._exit(0) 142 143 144for sig in (signal.SIGABRT, signal.SIGINT, signal.SIGTERM): 145 signal.signal(sig, PipeTermHandler) 146 147 148 149 150def ToolPath(tool, toolchain=None): 151 """Return a fully-qualified path to the specified tool, or just the tool if it's on PATH """ 152 if shutil.which(tool): 153 return tool 154 if not toolchain: 155 toolchain = FindToolchain() 156 return os.path.join(toolchain, tool) 157 158 159def FindToolchain(): 160 """Returns the toolchain.""" 161 162 global _CACHED_TOOLCHAIN 163 if _CACHED_TOOLCHAIN: 164 return _CACHED_TOOLCHAIN 165 166 llvm_binutils_dir = ANDROID_BUILD_TOP + "/prebuilts/clang/host/linux-x86/llvm-binutils-stable/"; 167 if not os.path.exists(llvm_binutils_dir): 168 raise Exception("Could not find llvm tool chain directory %s" % (llvm_binutils_dir)) 169 170 _CACHED_TOOLCHAIN = llvm_binutils_dir 171 print("Using toolchain from:", _CACHED_TOOLCHAIN) 172 return _CACHED_TOOLCHAIN 173 174 175def SymbolInformation(lib, addr): 176 """Look up symbol information about an address. 177 178 Args: 179 lib: library (or executable) pathname containing symbols 180 addr: string hexidecimal address 181 182 Returns: 183 A list of the form [(source_symbol, source_location, 184 object_symbol_with_offset)]. 185 186 If the function has been inlined then the list may contain 187 more than one element with the symbols for the most deeply 188 nested inlined location appearing first. The list is 189 always non-empty, even if no information is available. 190 191 Usually you want to display the source_location and 192 object_symbol_with_offset from the last element in the list. 193 """ 194 info = SymbolInformationForSet(lib, set([addr])) 195 return (info and info.get(addr)) or [(None, None, None)] 196 197 198def SymbolInformationForSet(lib, unique_addrs): 199 """Look up symbol information for a set of addresses from the given library. 200 201 Args: 202 lib: library (or executable) pathname containing symbols 203 unique_addrs: set of hexidecimal addresses 204 205 Returns: 206 A dictionary of the form {addr: [(source_symbol, source_location, 207 object_symbol_with_offset)]} where each address has a list of 208 associated symbols and locations. The list is always non-empty. 209 210 If the function has been inlined then the list may contain 211 more than one element with the symbols for the most deeply 212 nested inlined location appearing first. The list is 213 always non-empty, even if no information is available. 214 215 Usually you want to display the source_location and 216 object_symbol_with_offset from the last element in the list. 217 """ 218 if not lib: 219 return None 220 221 addr_to_line = CallLlvmSymbolizerForSet(lib, unique_addrs) 222 if not addr_to_line: 223 return None 224 225 addr_to_objdump = CallObjdumpForSet(lib, unique_addrs) 226 if not addr_to_objdump: 227 return None 228 229 result = {} 230 for addr in unique_addrs: 231 source_info = addr_to_line.get(addr) 232 if not source_info: 233 source_info = [(None, None)] 234 if addr in addr_to_objdump: 235 (object_symbol, object_offset) = addr_to_objdump.get(addr) 236 object_symbol_with_offset = FormatSymbolWithOffset(object_symbol, 237 object_offset) 238 else: 239 object_symbol_with_offset = None 240 result[addr] = [(source_symbol, source_location, object_symbol_with_offset) 241 for (source_symbol, source_location) in source_info] 242 243 return result 244 245 246def _OptionalStackRecordField(json_result, field): 247 """Fix up bizarre formatting of llvm-symbolizer output 248 249 Some parts of the FRAME output are output as a string containing a hex 250 integer, or the empty string when it's missing. 251 252 Args: 253 json_result: dictionary containing the Frame response 254 field: name of the field we want to read 255 256 Returns: 257 integer of field value, or None if missing 258 """ 259 value = json_result.get(field, "") 260 if isinstance(value, int): 261 # Leaving this here in case someone decides to fix the types of the 262 # symbolizer output, so it's easier to roll out. 263 return value 264 if value != "": 265 return int(value, 16) 266 return None 267 268 269def _GetJSONSymbolizerForLib(lib, args=None): 270 """ Find symbol file for lib, and return a llvm-symbolizer instance for it. 271 272 Args: 273 lib: library (or executable) pathname containing symbols 274 args: (optional) list of arguments to pass to llvm-symbolizer 275 276 Returns: 277 child process, or None if lib not found 278 """ 279 if args is None: 280 args = [] 281 symbols = SYMBOLS_DIR + lib 282 if not os.path.exists(symbols): 283 symbols = lib 284 if not os.path.exists(symbols): 285 return None 286 287 # Make sure the symbols path is not a directory. 288 if os.path.isdir(symbols): 289 return None 290 291 cmd = [ToolPath("llvm-symbolizer"), "--output-style=JSON"] + args + ["--obj=" + symbols] 292 return _PIPE_ADDR2LINE_CACHE.GetProcess(cmd) 293 294 295def GetStackRecordsForSet(lib, unique_addrs): 296 """Look up stack record information for a set of addresses 297 298 Args: 299 lib: library (or executable) pathname containing symbols 300 unique_addrs: set of integer addresses look up. 301 302 Returns: 303 A list of tuples 304 (addr, function_name, local_name, file_line, frame_offset, size, tag_offset) 305 describing the local variables of the stack frame. 306 frame_offset, size, tag_offset may be None. 307 """ 308 child = _GetJSONSymbolizerForLib(lib) 309 if child is None or child.poll() is not None: 310 return None 311 records = [] 312 for addr in unique_addrs: 313 child.stdin.write("FRAME 0x%x\n" % addr) 314 child.stdin.flush() 315 json_result = json.loads(child.stdout.readline().strip()) 316 for frame in json_result["Frame"]: 317 records.append( 318 (addr, 319 frame["FunctionName"], 320 frame["Name"], 321 frame["DeclFile"] + ":" + str(frame["DeclLine"]), 322 frame.get("FrameOffset"), 323 _OptionalStackRecordField(frame, "Size"), 324 _OptionalStackRecordField(frame, "TagOffset"))) 325 return records 326 327 328def CallLlvmSymbolizerForSet(lib, unique_addrs): 329 """Look up line and symbol information for a set of addresses. 330 331 Args: 332 lib: library (or executable) pathname containing symbols 333 unique_addrs: set of string hexidecimal addresses look up. 334 335 Returns: 336 A dictionary of the form {addr: [(symbol, file:line)]} where 337 each address has a list of associated symbols and locations 338 or an empty list if no symbol information was found. 339 340 If the function has been inlined then the list may contain 341 more than one element with the symbols for the most deeply 342 nested inlined location appearing first. 343 """ 344 if not lib: 345 return None 346 347 result = {} 348 addrs = sorted(unique_addrs) 349 350 if lib in _SYMBOL_INFORMATION_ADDR2LINE_CACHE: 351 addr_cache = _SYMBOL_INFORMATION_ADDR2LINE_CACHE[lib] 352 353 # Go through and handle all known addresses. 354 for x in range(len(addrs)): 355 next_addr = addrs.pop(0) 356 if next_addr in addr_cache: 357 result[next_addr] = addr_cache[next_addr] 358 else: 359 # Re-add, needs to be symbolized. 360 addrs.append(next_addr) 361 362 if not addrs: 363 # Everything was cached, we're done. 364 return result 365 else: 366 addr_cache = {} 367 _SYMBOL_INFORMATION_ADDR2LINE_CACHE[lib] = addr_cache 368 369 child = _GetJSONSymbolizerForLib( 370 lib, ["--functions", "--inlines", "--demangle"]) 371 if child is None: 372 return None 373 for addr in addrs: 374 try: 375 child.stdin.write("0x%s\n" % addr) 376 child.stdin.flush() 377 records = [] 378 json_result = json.loads(child.stdout.readline().strip()) 379 if "Symbol" in json_result: 380 for symbol in json_result["Symbol"]: 381 function_name = symbol["FunctionName"] 382 # GNU style location: file_name:line_num 383 location = ("%s:%s" % (symbol["FileName"], symbol["Line"])) 384 records.append((function_name, location)) 385 except IOError as e: 386 # Remove the / in front of the library name to match other output. 387 records = [(None, lib[1:] + " ***Error: " + str(e))] 388 result[addr] = records 389 addr_cache[addr] = records 390 return result 391 392 393def CallObjdumpForSet(lib, unique_addrs): 394 """Use objdump to find out the names of the containing functions. 395 396 Args: 397 lib: library (or executable) pathname containing symbols 398 unique_addrs: set of string hexidecimal addresses to find the functions for. 399 400 Returns: 401 A dictionary of the form {addr: (string symbol, offset)}. 402 """ 403 if not lib: 404 return None 405 406 result = {} 407 addrs = sorted(unique_addrs) 408 409 addr_cache = None 410 if lib in _SYMBOL_INFORMATION_OBJDUMP_CACHE: 411 addr_cache = _SYMBOL_INFORMATION_OBJDUMP_CACHE[lib] 412 413 # Go through and handle all known addresses. 414 for x in range(len(addrs)): 415 next_addr = addrs.pop(0) 416 if next_addr in addr_cache: 417 result[next_addr] = addr_cache[next_addr] 418 else: 419 # Re-add, needs to be symbolized. 420 addrs.append(next_addr) 421 422 if not addrs: 423 # Everything was cached, we're done. 424 return result 425 else: 426 addr_cache = {} 427 _SYMBOL_INFORMATION_OBJDUMP_CACHE[lib] = addr_cache 428 429 symbols = SYMBOLS_DIR + lib 430 if not os.path.exists(symbols): 431 symbols = lib 432 if not os.path.exists(symbols): 433 return None 434 435 start_addr_dec = str(int(addrs[0], 16)) 436 stop_addr_dec = str(int(addrs[-1], 16) + 8) 437 cmd = [ToolPath("llvm-objdump"), 438 "--section=.text", 439 "--demangle", 440 "--disassemble", 441 "--start-address=" + start_addr_dec, 442 "--stop-address=" + stop_addr_dec, 443 symbols] 444 445 # Function lines look like: 446 # 000177b0 <android::IBinder::~IBinder()+0x2c>: 447 # We pull out the address and function first. Then we check for an optional 448 # offset. This is tricky due to functions that look like "operator+(..)+0x2c" 449 func_regexp = re.compile("(^[a-f0-9]*) \<(.*)\>:$") 450 offset_regexp = re.compile("(.*)\+0x([a-f0-9]*)") 451 452 # A disassembly line looks like: 453 # 177b2: b510 push {r4, lr} 454 asm_regexp = re.compile("(^[ a-f0-9]*):[ a-f0-0]*.*$") 455 456 current_symbol = None # The current function symbol in the disassembly. 457 current_symbol_addr = 0 # The address of the current function. 458 addr_index = 0 # The address that we are currently looking for. 459 460 stream = subprocess.Popen(cmd, stdout=subprocess.PIPE, universal_newlines=True).stdout 461 for line in stream: 462 # Is it a function line like: 463 # 000177b0 <android::IBinder::~IBinder()>: 464 components = func_regexp.match(line) 465 if components: 466 # This is a new function, so record the current function and its address. 467 current_symbol_addr = int(components.group(1), 16) 468 current_symbol = components.group(2) 469 470 # Does it have an optional offset like: "foo(..)+0x2c"? 471 components = offset_regexp.match(current_symbol) 472 if components: 473 current_symbol = components.group(1) 474 offset = components.group(2) 475 if offset: 476 current_symbol_addr -= int(offset, 16) 477 478 # Is it an disassembly line like: 479 # 177b2: b510 push {r4, lr} 480 components = asm_regexp.match(line) 481 if components: 482 addr = components.group(1) 483 target_addr = addrs[addr_index] 484 i_addr = int(addr, 16) 485 i_target = int(target_addr, 16) 486 if i_addr == i_target: 487 result[target_addr] = (current_symbol, i_target - current_symbol_addr) 488 addr_cache[target_addr] = result[target_addr] 489 addr_index += 1 490 if addr_index >= len(addrs): 491 break 492 stream.close() 493 494 return result 495 496 497def CallCppFilt(mangled_symbol): 498 if mangled_symbol in _SYMBOL_DEMANGLING_CACHE: 499 return _SYMBOL_DEMANGLING_CACHE[mangled_symbol] 500 501 global _CACHED_CXX_FILT 502 if not _CACHED_CXX_FILT: 503 toolchains = None 504 clang_dir = FindClangDir() 505 if clang_dir: 506 if os.path.exists(clang_dir + "/bin/llvm-cxxfilt"): 507 toolchains = [clang_dir + "/bin/llvm-cxxfilt"] 508 else: 509 raise Exception("bin/llvm-cxxfilt missing from " + clang_dir) 510 else: 511 # When run in CI, we don't have a way to find the clang version. But 512 # llvm-cxxfilt should be available in the following relative path. 513 toolchains = glob.glob("./clang-r*/bin/llvm-cxxfilt") 514 if toolchains and len(toolchains) != 1: 515 raise Exception("Expected one llvm-cxxfilt but found many: " + \ 516 ", ".join(toolchains)) 517 if not toolchains: 518 raise Exception("Could not find llvm-cxxfilt tool") 519 _CACHED_CXX_FILT = sorted(toolchains)[-1] 520 521 cmd = [_CACHED_CXX_FILT] 522 process = _PIPE_CPPFILT_CACHE.GetProcess(cmd) 523 process.stdin.write(mangled_symbol) 524 process.stdin.write("\n") 525 process.stdin.flush() 526 527 demangled_symbol = process.stdout.readline().strip() 528 529 _SYMBOL_DEMANGLING_CACHE[mangled_symbol] = demangled_symbol 530 531 return demangled_symbol 532 533 534def FormatSymbolWithOffset(symbol, offset): 535 if offset == 0: 536 return symbol 537 return "%s+%d" % (symbol, offset) 538 539def FormatSymbolWithoutParameters(symbol): 540 """Remove parameters from function. 541 542 Rather than trying to parse the demangled C++ signature, 543 it just removes matching top level parenthesis. 544 """ 545 if not symbol: 546 return symbol 547 548 result = symbol 549 result = result.replace(") const", ")") # Strip const keyword. 550 result = result.replace("operator<<", "operator\u00AB") # Avoid unmatched '<'. 551 result = result.replace("operator>>", "operator\u00BB") # Avoid unmatched '>'. 552 result = result.replace("operator->", "operator\u2192") # Avoid unmatched '>'. 553 554 nested = [] # Keeps tract of current nesting level of parenthesis. 555 for i in reversed(range(len(result))): # Iterate backward to make cutting easier. 556 c = result[i] 557 if c == ')' or c == '>': 558 if len(nested) == 0: 559 end = i + 1 # Mark the end of top-level pair. 560 nested.append(c) 561 if c == '(' or c == '<': 562 if len(nested) == 0 or {')':'(', '>':'<'}[nested.pop()] != c: 563 return symbol # Malformed: character does not match its pair. 564 if len(nested) == 0 and c == '(' and (end - i) > 2: 565 result = result[:i] + result[end:] # Remove substring (i, end). 566 if len(nested) > 0: 567 return symbol # Malformed: missing pair. 568 569 return result.strip() 570 571def SetBitness(lines): 572 global ARCH_IS_32BIT 573 574 trace_line = re.compile("\#[0-9]+[ \t]+..[ \t]+([0-9a-f]{8}|[0-9a-f]{16})([ \t]+|$)") 575 asan_trace_line = re.compile("\#[0-9]+[ \t]+0x([0-9a-f]+)[ \t]+") 576 577 ARCH_IS_32BIT = False 578 for line in lines: 579 trace_match = trace_line.search(line) 580 if trace_match: 581 # Try to guess the arch, we know the bitness. 582 if len(trace_match.group(1)) == 16: 583 ARCH_IS_32BIT = False 584 else: 585 ARCH_IS_32BIT = True 586 break 587 asan_trace_match = asan_trace_line.search(line) 588 if asan_trace_match: 589 # We might be able to guess the bitness by the length of the address. 590 if len(asan_trace_match.group(1)) > 8: 591 ARCH_IS_32BIT = False 592 # We know for a fact this is 64 bit, so we are done. 593 break 594 else: 595 # This might be 32 bit, or just a small address. Keep going in this 596 # case, but if we couldn't figure anything else out, go with 32 bit. 597 ARCH_IS_32BIT = True 598 599class FindClangDirTests(unittest.TestCase): 600 @unittest.skipIf(ANDROID_BUILD_TOP == '.', 'Test only supported in an Android tree.') 601 def test_clang_dir_found(self): 602 self.assertIsNotNone(FindClangDir()) 603 604class SetBitnessTests(unittest.TestCase): 605 def test_32bit_check(self): 606 global ARCH_IS_32BIT 607 608 SetBitness(["#00 pc 000374e0"]) 609 self.assertTrue(ARCH_IS_32BIT) 610 611 def test_64bit_check(self): 612 global ARCH_IS_32BIT 613 614 SetBitness(["#00 pc 00000000000374e0"]) 615 self.assertFalse(ARCH_IS_32BIT) 616 617 def test_32bit_asan_trace_line_toolchain(self): 618 global ARCH_IS_32BIT 619 620 SetBitness(["#10 0xb5eeba5d (/system/vendor/lib/egl/libGLESv1_CM_adreno.so+0xfa5d)"]) 621 self.assertTrue(ARCH_IS_32BIT) 622 623 def test_64bit_asan_trace_line_toolchain(self): 624 global ARCH_IS_32BIT 625 626 SetBitness(["#12 0x5d33bf (/system/lib/libclang_rt.asan-arm-android.so+0x823bf)", 627 "#12 0x11b35d33bf (/system/lib/libclang_rt.asan-arm-android.so+0x823bf)"]) 628 self.assertFalse(ARCH_IS_32BIT) 629 630class FormatSymbolWithoutParametersTests(unittest.TestCase): 631 def test_c(self): 632 self.assertEqual(FormatSymbolWithoutParameters("foo"), "foo") 633 self.assertEqual(FormatSymbolWithoutParameters("foo+42"), "foo+42") 634 635 def test_simple(self): 636 self.assertEqual(FormatSymbolWithoutParameters("foo(int i)"), "foo") 637 self.assertEqual(FormatSymbolWithoutParameters("foo(int i)+42"), "foo+42") 638 self.assertEqual(FormatSymbolWithoutParameters("bar::foo(int i)+42"), "bar::foo+42") 639 self.assertEqual(FormatSymbolWithoutParameters("operator()"), "operator()") 640 641 def test_templates(self): 642 self.assertEqual(FormatSymbolWithoutParameters("bar::foo<T>(vector<T>& v)"), "bar::foo<T>") 643 self.assertEqual(FormatSymbolWithoutParameters("bar<T>::foo(vector<T>& v)"), "bar<T>::foo") 644 self.assertEqual(FormatSymbolWithoutParameters("bar::foo<T>(vector<T<U>>& v)"), "bar::foo<T>") 645 self.assertEqual(FormatSymbolWithoutParameters("bar::foo<(EnumType)0>(vector<(EnumType)0>& v)"), 646 "bar::foo<(EnumType)0>") 647 648 def test_nested(self): 649 self.assertEqual(FormatSymbolWithoutParameters("foo(int i)::bar(int j)"), "foo::bar") 650 651 def test_unbalanced(self): 652 self.assertEqual(FormatSymbolWithoutParameters("foo(bar(int i)"), "foo(bar(int i)") 653 self.assertEqual(FormatSymbolWithoutParameters("foo)bar(int i)"), "foo)bar(int i)") 654 self.assertEqual(FormatSymbolWithoutParameters("foo<bar(int i)"), "foo<bar(int i)") 655 self.assertEqual(FormatSymbolWithoutParameters("foo>bar(int i)"), "foo>bar(int i)") 656 657if __name__ == '__main__': 658 unittest.main(verbosity=2) 659