1#!/usr/bin/python 2# 3# Copyright (C) 2013 The Android Open Source Project 4# 5# Licensed under the Apache License, Version 2.0 (the "License"); 6# you may not use this file except in compliance with the License. 7# You may obtain a copy of the License at 8# 9# http://www.apache.org/licenses/LICENSE-2.0 10# 11# Unless required by applicable law or agreed to in writing, software 12# distributed under the License is distributed on an "AS IS" BASIS, 13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14# See the License for the specific language governing permissions and 15# limitations under the License. 16 17"""Module for looking up symbolic debugging information. 18 19The information can include symbol names, offsets, and source locations. 20""" 21 22import glob 23import itertools 24import os 25import re 26import subprocess 27import zipfile 28 29CHROME_SRC = os.path.join(os.path.realpath(os.path.dirname(__file__)), 30 os.pardir, os.pardir, os.pardir, os.pardir) 31ANDROID_BUILD_TOP = CHROME_SRC 32SYMBOLS_DIR = CHROME_SRC 33CHROME_SYMBOLS_DIR = CHROME_SRC 34 35ARCH = "arm" 36 37TOOLCHAIN_INFO = None 38 39def Uname(): 40 """'uname' for constructing prebuilt/<...> and out/host/<...> paths.""" 41 uname = os.uname()[0] 42 if uname == "Darwin": 43 proc = os.uname()[-1] 44 if proc == "i386" or proc == "x86_64": 45 return "darwin-x86" 46 return "darwin-ppc" 47 if uname == "Linux": 48 return "linux-x86" 49 return uname 50 51def ToolPath(tool, toolchain_info=None): 52 """Return a full qualified path to the specified tool""" 53 # ToolPath looks for the tools in the completely incorrect directory. 54 # This looks in the checked in android_tools. 55 if ARCH == "arm": 56 toolchain_source = "arm-linux-androideabi-4.6" 57 toolchain_prefix = "arm-linux-androideabi" 58 ndk = "ndk" 59 elif ARCH == "arm64": 60 toolchain_source = "aarch64-linux-android-4.9" 61 toolchain_prefix = "aarch64-linux-android" 62 ndk = "ndk" 63 elif ARCH == "x86": 64 toolchain_source = "x86-4.6" 65 toolchain_prefix = "i686-android-linux" 66 ndk = "ndk" 67 elif ARCH == "x86_64": 68 toolchain_source = "x86_64-4.9" 69 toolchain_prefix = "x86_64-linux-android" 70 ndk = "ndk" 71 elif ARCH == "mips": 72 toolchain_source = "mipsel-linux-android-4.6" 73 toolchain_prefix = "mipsel-linux-android" 74 ndk = "ndk" 75 else: 76 raise Exception("Could not find tool chain") 77 78 toolchain_subdir = ( 79 "third_party/android_tools/%s/toolchains/%s/prebuilt/linux-x86_64/bin" % 80 (ndk, toolchain_source)) 81 82 return os.path.join(CHROME_SRC, 83 toolchain_subdir, 84 toolchain_prefix + "-" + tool) 85 86def FindToolchain(): 87 """Look for the latest available toolchain 88 89 Args: 90 None 91 92 Returns: 93 A pair of strings containing toolchain label and target prefix. 94 """ 95 global TOOLCHAIN_INFO 96 if TOOLCHAIN_INFO is not None: 97 return TOOLCHAIN_INFO 98 99 ## Known toolchains, newer ones in the front. 100 if ARCH == "arm64": 101 gcc_version = "4.9" 102 known_toolchains = [ 103 ("aarch64-linux-android-" + gcc_version, "aarch64", "aarch64-linux-android") 104 ] 105 elif ARCH == "arm": 106 gcc_version = "4.6" 107 known_toolchains = [ 108 ("arm-linux-androideabi-" + gcc_version, "arm", "arm-linux-androideabi"), 109 ] 110 elif ARCH =="x86": 111 known_toolchains = [ 112 ("i686-android-linux-4.4.3", "x86", "i686-android-linux") 113 ] 114 elif ARCH =="x86_64": 115 known_toolchains = [ 116 ("x86_64-linux-android-4.9", "x86_64", "x86_64-linux-android") 117 ] 118 elif ARCH == "mips": 119 gcc_version = "4.6" 120 known_toolchains = [ 121 ("mipsel-linux-android-" + gcc_version, "mips", "mipsel-linux-android") 122 ] 123 else: 124 known_toolchains = [] 125 126 # Look for addr2line to check for valid toolchain path. 127 for (label, platform, target) in known_toolchains: 128 toolchain_info = (label, platform, target); 129 if os.path.exists(ToolPath("addr2line", toolchain_info)): 130 TOOLCHAIN_INFO = toolchain_info 131 print "Using toolchain from :" + ToolPath("", TOOLCHAIN_INFO) 132 return toolchain_info 133 134 raise Exception("Could not find tool chain") 135 136def GetAapt(): 137 """Returns the path to aapt. 138 139 Args: 140 None 141 142 Returns: 143 the pathname of the 'aapt' executable. 144 """ 145 sdk_home = os.path.join('third_party', 'android_tools', 'sdk') 146 sdk_home = os.environ.get('SDK_HOME', sdk_home) 147 aapt_exe = glob.glob(os.path.join(sdk_home, 'build-tools', '*', 'aapt')) 148 if not aapt_exe: 149 return None 150 return sorted(aapt_exe, key=os.path.getmtime, reverse=True)[0] 151 152def ApkMatchPackageName(aapt, apk_path, package_name): 153 """Returns true the APK's package name matches package_name. 154 155 Args: 156 aapt: pathname for the 'aapt' executable. 157 apk_path: pathname of the APK file. 158 package_name: package name to match. 159 160 Returns: 161 True if the package name matches or aapt is None, False otherwise. 162 """ 163 if not aapt: 164 # Allow false positives 165 return True 166 aapt_output = subprocess.check_output( 167 [aapt, 'dump', 'badging', apk_path]).split('\n') 168 package_name_re = re.compile(r'package: .*name=\'(\S*)\'') 169 for line in aapt_output: 170 match = package_name_re.match(line) 171 if match: 172 return package_name == match.group(1) 173 return False 174 175def PathListJoin(prefix_list, suffix_list): 176 """Returns each prefix in prefix_list joined with each suffix in suffix list. 177 178 Args: 179 prefix_list: list of path prefixes. 180 suffix_list: list of path suffixes. 181 182 Returns: 183 List of paths each of which joins a prefix with a suffix. 184 """ 185 return [ 186 os.path.join(prefix, suffix) 187 for prefix in prefix_list for suffix in suffix_list ] 188 189def GetCandidates(dirs, filepart, candidate_fun): 190 """Returns a list of candidate filenames. 191 192 Args: 193 dirs: a list of the directory part of the pathname. 194 filepart: the file part of the pathname. 195 candidate_fun: a function to apply to each candidate, returns a list. 196 197 Returns: 198 A list of candidate files ordered by modification time, newest first. 199 """ 200 out_dir = os.environ.get('CHROMIUM_OUT_DIR', 'out') 201 out_dir = os.path.join(CHROME_SYMBOLS_DIR, out_dir) 202 buildtype = os.environ.get('BUILDTYPE') 203 if buildtype: 204 buildtype_list = [ buildtype ] 205 else: 206 buildtype_list = [ 'Debug', 'Release' ] 207 208 candidates = PathListJoin([out_dir], buildtype_list) + [CHROME_SYMBOLS_DIR] 209 candidates = PathListJoin(candidates, dirs) 210 candidates = PathListJoin(candidates, [filepart]) 211 candidates = list( 212 itertools.chain.from_iterable(map(candidate_fun, candidates))) 213 candidates = sorted(candidates, key=os.path.getmtime, reverse=True) 214 return candidates 215 216def GetCandidateApks(): 217 """Returns a list of APKs which could contain the library. 218 219 Args: 220 None 221 222 Returns: 223 list of APK filename which could contain the library. 224 """ 225 return GetCandidates(['apks'], '*.apk', glob.glob) 226 227def GetCrazyLib(apk_filename): 228 """Returns the name of the first crazy library from this APK. 229 230 Args: 231 apk_filename: name of an APK file. 232 233 Returns: 234 Name of the first library which would be crazy loaded from this APK. 235 """ 236 zip_file = zipfile.ZipFile(apk_filename, 'r') 237 for filename in zip_file.namelist(): 238 match = re.match('lib/[^/]*/crazy.(lib.*[.]so)', filename) 239 if match: 240 return match.group(1) 241 242def GetMatchingApks(device_apk_name): 243 """Find any APKs which match the package indicated by the device_apk_name. 244 245 Args: 246 device_apk_name: name of the APK on the device. 247 248 Returns: 249 A list of APK filenames which could contain the desired library. 250 """ 251 match = re.match('(.*)-[0-9]+[.]apk$', device_apk_name) 252 if not match: 253 return None 254 package_name = match.group(1) 255 return filter( 256 lambda candidate_apk: 257 ApkMatchPackageName(GetAapt(), candidate_apk, package_name), 258 GetCandidateApks()) 259 260def MapDeviceApkToLibrary(device_apk_name): 261 """Provide a library name which corresponds with device_apk_name. 262 263 Args: 264 device_apk_name: name of the APK on the device. 265 266 Returns: 267 Name of the library which corresponds to that APK. 268 """ 269 matching_apks = GetMatchingApks(device_apk_name) 270 for matching_apk in matching_apks: 271 crazy_lib = GetCrazyLib(matching_apk) 272 if crazy_lib: 273 return crazy_lib 274 275def GetCandidateLibraries(library_name): 276 """Returns a list of candidate library filenames. 277 278 Args: 279 library_name: basename of the library to match. 280 281 Returns: 282 A list of matching library filenames for library_name. 283 """ 284 return GetCandidates( 285 ['lib', 'lib.target'], library_name, 286 lambda filename: filter(os.path.exists, [filename])) 287 288def TranslateLibPath(lib): 289 # SymbolInformation(lib, addr) receives lib as the path from symbols 290 # root to the symbols file. This needs to be translated to point to the 291 # correct .so path. If the user doesn't explicitly specify which directory to 292 # use, then use the most recently updated one in one of the known directories. 293 # If the .so is not found somewhere in CHROME_SYMBOLS_DIR, leave it 294 # untranslated in case it is an Android symbol in SYMBOLS_DIR. 295 library_name = os.path.basename(lib) 296 297 # The filename in the stack trace maybe an APK name rather than a library 298 # name. This happens when the library was loaded directly from inside the 299 # APK. If this is the case we try to figure out the library name by looking 300 # for a matching APK file and finding the name of the library in contains. 301 # The name of the APK file on the device is of the form 302 # <package_name>-<number>.apk. The APK file on the host may have any name 303 # so we look at the APK badging to see if the package name matches. 304 if re.search('-[0-9]+[.]apk$', library_name): 305 mapping = MapDeviceApkToLibrary(library_name) 306 if mapping: 307 library_name = mapping 308 309 candidate_libraries = GetCandidateLibraries(library_name) 310 if not candidate_libraries: 311 return lib 312 313 library_path = os.path.relpath(candidate_libraries[0], SYMBOLS_DIR) 314 return '/' + library_path 315 316def SymbolInformation(lib, addr, get_detailed_info): 317 """Look up symbol information about an address. 318 319 Args: 320 lib: library (or executable) pathname containing symbols 321 addr: string hexidecimal address 322 323 Returns: 324 A list of the form [(source_symbol, source_location, 325 object_symbol_with_offset)]. 326 327 If the function has been inlined then the list may contain 328 more than one element with the symbols for the most deeply 329 nested inlined location appearing first. The list is 330 always non-empty, even if no information is available. 331 332 Usually you want to display the source_location and 333 object_symbol_with_offset from the last element in the list. 334 """ 335 lib = TranslateLibPath(lib) 336 info = SymbolInformationForSet(lib, set([addr]), get_detailed_info) 337 return (info and info.get(addr)) or [(None, None, None)] 338 339 340def SymbolInformationForSet(lib, unique_addrs, get_detailed_info): 341 """Look up symbol information for a set of addresses from the given library. 342 343 Args: 344 lib: library (or executable) pathname containing symbols 345 unique_addrs: set of hexidecimal addresses 346 347 Returns: 348 A dictionary of the form {addr: [(source_symbol, source_location, 349 object_symbol_with_offset)]} where each address has a list of 350 associated symbols and locations. The list is always non-empty. 351 352 If the function has been inlined then the list may contain 353 more than one element with the symbols for the most deeply 354 nested inlined location appearing first. The list is 355 always non-empty, even if no information is available. 356 357 Usually you want to display the source_location and 358 object_symbol_with_offset from the last element in the list. 359 """ 360 if not lib: 361 return None 362 363 addr_to_line = CallAddr2LineForSet(lib, unique_addrs) 364 if not addr_to_line: 365 return None 366 367 if get_detailed_info: 368 addr_to_objdump = CallObjdumpForSet(lib, unique_addrs) 369 if not addr_to_objdump: 370 return None 371 else: 372 addr_to_objdump = dict((addr, ("", 0)) for addr in unique_addrs) 373 374 result = {} 375 for addr in unique_addrs: 376 source_info = addr_to_line.get(addr) 377 if not source_info: 378 source_info = [(None, None)] 379 if addr in addr_to_objdump: 380 (object_symbol, object_offset) = addr_to_objdump.get(addr) 381 object_symbol_with_offset = FormatSymbolWithOffset(object_symbol, 382 object_offset) 383 else: 384 object_symbol_with_offset = None 385 result[addr] = [(source_symbol, source_location, object_symbol_with_offset) 386 for (source_symbol, source_location) in source_info] 387 388 return result 389 390 391class MemoizedForSet(object): 392 def __init__(self, fn): 393 self.fn = fn 394 self.cache = {} 395 396 def __call__(self, lib, unique_addrs): 397 lib_cache = self.cache.setdefault(lib, {}) 398 399 no_cache = filter(lambda x: x not in lib_cache, unique_addrs) 400 if no_cache: 401 lib_cache.update((k, None) for k in no_cache) 402 result = self.fn(lib, no_cache) 403 if result: 404 lib_cache.update(result) 405 406 return dict((k, lib_cache[k]) for k in unique_addrs if lib_cache[k]) 407 408 409@MemoizedForSet 410def CallAddr2LineForSet(lib, unique_addrs): 411 """Look up line and symbol information for a set of addresses. 412 413 Args: 414 lib: library (or executable) pathname containing symbols 415 unique_addrs: set of string hexidecimal addresses look up. 416 417 Returns: 418 A dictionary of the form {addr: [(symbol, file:line)]} where 419 each address has a list of associated symbols and locations 420 or an empty list if no symbol information was found. 421 422 If the function has been inlined then the list may contain 423 more than one element with the symbols for the most deeply 424 nested inlined location appearing first. 425 """ 426 if not lib: 427 return None 428 429 430 symbols = SYMBOLS_DIR + lib 431 if not os.path.isfile(symbols): 432 return None 433 434 (label, platform, target) = FindToolchain() 435 cmd = [ToolPath("addr2line"), "--functions", "--inlines", 436 "--demangle", "--exe=" + symbols] 437 child = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE) 438 439 result = {} 440 addrs = sorted(unique_addrs) 441 for addr in addrs: 442 child.stdin.write("0x%s\n" % addr) 443 child.stdin.flush() 444 records = [] 445 first = True 446 while True: 447 symbol = child.stdout.readline().strip() 448 if symbol == "??": 449 symbol = None 450 location = child.stdout.readline().strip() 451 if location == "??:0": 452 location = None 453 if symbol is None and location is None: 454 break 455 records.append((symbol, location)) 456 if first: 457 # Write a blank line as a sentinel so we know when to stop 458 # reading inlines from the output. 459 # The blank line will cause addr2line to emit "??\n??:0\n". 460 child.stdin.write("\n") 461 first = False 462 result[addr] = records 463 child.stdin.close() 464 child.stdout.close() 465 return result 466 467 468def StripPC(addr): 469 """Strips the Thumb bit a program counter address when appropriate. 470 471 Args: 472 addr: the program counter address 473 474 Returns: 475 The stripped program counter address. 476 """ 477 global ARCH 478 479 if ARCH == "arm": 480 return addr & ~1 481 return addr 482 483@MemoizedForSet 484def CallObjdumpForSet(lib, unique_addrs): 485 """Use objdump to find out the names of the containing functions. 486 487 Args: 488 lib: library (or executable) pathname containing symbols 489 unique_addrs: set of string hexidecimal addresses to find the functions for. 490 491 Returns: 492 A dictionary of the form {addr: (string symbol, offset)}. 493 """ 494 if not lib: 495 return None 496 497 symbols = SYMBOLS_DIR + lib 498 if not os.path.exists(symbols): 499 return None 500 501 symbols = SYMBOLS_DIR + lib 502 if not os.path.exists(symbols): 503 return None 504 505 result = {} 506 507 # Function lines look like: 508 # 000177b0 <android::IBinder::~IBinder()+0x2c>: 509 # We pull out the address and function first. Then we check for an optional 510 # offset. This is tricky due to functions that look like "operator+(..)+0x2c" 511 func_regexp = re.compile("(^[a-f0-9]*) \<(.*)\>:$") 512 offset_regexp = re.compile("(.*)\+0x([a-f0-9]*)") 513 514 # A disassembly line looks like: 515 # 177b2: b510 push {r4, lr} 516 asm_regexp = re.compile("(^[ a-f0-9]*):[ a-f0-0]*.*$") 517 518 for target_addr in unique_addrs: 519 start_addr_dec = str(StripPC(int(target_addr, 16))) 520 stop_addr_dec = str(StripPC(int(target_addr, 16)) + 8) 521 cmd = [ToolPath("objdump"), 522 "--section=.text", 523 "--demangle", 524 "--disassemble", 525 "--start-address=" + start_addr_dec, 526 "--stop-address=" + stop_addr_dec, 527 symbols] 528 529 current_symbol = None # The current function symbol in the disassembly. 530 current_symbol_addr = 0 # The address of the current function. 531 532 stream = subprocess.Popen(cmd, stdout=subprocess.PIPE).stdout 533 for line in stream: 534 # Is it a function line like: 535 # 000177b0 <android::IBinder::~IBinder()>: 536 components = func_regexp.match(line) 537 if components: 538 # This is a new function, so record the current function and its address. 539 current_symbol_addr = int(components.group(1), 16) 540 current_symbol = components.group(2) 541 542 # Does it have an optional offset like: "foo(..)+0x2c"? 543 components = offset_regexp.match(current_symbol) 544 if components: 545 current_symbol = components.group(1) 546 offset = components.group(2) 547 if offset: 548 current_symbol_addr -= int(offset, 16) 549 550 # Is it an disassembly line like: 551 # 177b2: b510 push {r4, lr} 552 components = asm_regexp.match(line) 553 if components: 554 addr = components.group(1) 555 i_addr = int(addr, 16) 556 i_target = StripPC(int(target_addr, 16)) 557 if i_addr == i_target: 558 result[target_addr] = (current_symbol, i_target - current_symbol_addr) 559 stream.close() 560 561 return result 562 563 564def CallCppFilt(mangled_symbol): 565 cmd = [ToolPath("c++filt")] 566 process = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE) 567 process.stdin.write(mangled_symbol) 568 process.stdin.write("\n") 569 process.stdin.close() 570 demangled_symbol = process.stdout.readline().strip() 571 process.stdout.close() 572 return demangled_symbol 573 574def FormatSymbolWithOffset(symbol, offset): 575 if offset == 0: 576 return symbol 577 return "%s+%d" % (symbol, offset) 578