1#!/usr/bin/env python3 2# 3# Copyright (C) 2016 The Android Open Source Project 4# 5# Licensed under the Apache License, Version 2.0 (the "License"); 6# you may not use this file except in compliance with the License. 7# You may obtain a copy of the License at 8# 9# http://www.apache.org/licenses/LICENSE-2.0 10# 11# Unless required by applicable law or agreed to in writing, software 12# distributed under the License is distributed on an "AS IS" BASIS, 13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14# See the License for the specific language governing permissions and 15# limitations under the License. 16# 17 18"""utils.py: export utility functions. 19""" 20 21from __future__ import annotations 22import argparse 23import logging 24import os 25import os.path 26from pathlib import Path 27import re 28import shutil 29import subprocess 30import sys 31import time 32from typing import Dict, Iterator, List, Optional, Set, Union 33 34 35def get_script_dir() -> str: 36 return os.path.dirname(os.path.realpath(__file__)) 37 38 39def is_windows() -> bool: 40 return sys.platform == 'win32' or sys.platform == 'cygwin' 41 42 43def is_darwin() -> bool: 44 return sys.platform == 'darwin' 45 46 47def get_platform() -> str: 48 if is_windows(): 49 return 'windows' 50 if is_darwin(): 51 return 'darwin' 52 return 'linux' 53 54 55def is_python3() -> str: 56 return sys.version_info >= (3, 0) 57 58 59def log_debug(msg: str): 60 logging.debug(msg) 61 62 63def log_info(msg: str): 64 logging.info(msg) 65 66 67def log_warning(msg: str): 68 logging.warning(msg) 69 70 71def log_fatal(msg: str): 72 raise Exception(msg) 73 74 75def log_exit(msg: str): 76 sys.exit(msg) 77 78 79def disable_debug_log(): 80 logging.getLogger().setLevel(logging.WARN) 81 82 83def set_log_level(level_name: str): 84 if level_name == 'debug': 85 level = logging.DEBUG 86 elif level_name == 'info': 87 level = logging.INFO 88 elif level_name == 'warning': 89 level = logging.WARNING 90 else: 91 log_fatal('unknown log level: %s' % level_name) 92 logging.getLogger().setLevel(level) 93 94 95def str_to_bytes(str_value: str) -> bytes: 96 if not is_python3(): 97 return str_value 98 # In python 3, str are wide strings whereas the C api expects 8 bit strings, 99 # hence we have to convert. For now using utf-8 as the encoding. 100 return str_value.encode('utf-8') 101 102 103def bytes_to_str(bytes_value: Optional[bytes]) -> str: 104 if not bytes_value: 105 return '' 106 if not is_python3(): 107 return bytes_value 108 return bytes_value.decode('utf-8') 109 110 111def get_target_binary_path(arch: str, binary_name: str) -> str: 112 if arch == 'aarch64': 113 arch = 'arm64' 114 arch_dir = os.path.join(get_script_dir(), "bin", "android", arch) 115 if not os.path.isdir(arch_dir): 116 log_fatal("can't find arch directory: %s" % arch_dir) 117 binary_path = os.path.join(arch_dir, binary_name) 118 if not os.path.isfile(binary_path): 119 log_fatal("can't find binary: %s" % binary_path) 120 return binary_path 121 122 123def get_host_binary_path(binary_name: str) -> str: 124 dirname = os.path.join(get_script_dir(), 'bin') 125 if is_windows(): 126 if binary_name.endswith('.so'): 127 binary_name = binary_name[0:-3] + '.dll' 128 elif '.' not in binary_name: 129 binary_name += '.exe' 130 dirname = os.path.join(dirname, 'windows') 131 elif sys.platform == 'darwin': # OSX 132 if binary_name.endswith('.so'): 133 binary_name = binary_name[0:-3] + '.dylib' 134 dirname = os.path.join(dirname, 'darwin') 135 else: 136 dirname = os.path.join(dirname, 'linux') 137 dirname = os.path.join(dirname, 'x86_64' if sys.maxsize > 2 ** 32 else 'x86') 138 binary_path = os.path.join(dirname, binary_name) 139 if not os.path.isfile(binary_path): 140 log_fatal("can't find binary: %s" % binary_path) 141 return binary_path 142 143 144def is_executable_available(executable: str, option='--help') -> bool: 145 """ Run an executable to see if it exists. """ 146 try: 147 subproc = subprocess.Popen([executable, option], stdout=subprocess.PIPE, 148 stderr=subprocess.PIPE) 149 subproc.communicate() 150 return subproc.returncode == 0 151 except OSError: 152 return False 153 154 155class ToolFinder: 156 """ Find tools in ndk or sdk. """ 157 DEFAULT_SDK_PATH = { 158 'darwin': 'Library/Android/sdk', 159 'linux': 'Android/Sdk', 160 'windows': 'AppData/Local/Android/sdk', 161 } 162 163 EXPECTED_TOOLS = { 164 'adb': { 165 'is_binutils': False, 166 'test_option': 'version', 167 'path_in_sdk': 'platform-tools/adb', 168 }, 169 'llvm-objdump': { 170 'is_binutils': False, 171 'path_in_ndk': 172 lambda platform: 'toolchains/llvm/prebuilt/%s-x86_64/bin/llvm-objdump' % platform, 173 }, 174 'llvm-readelf': { 175 'is_binutils': False, 176 'path_in_ndk': 177 lambda platform: 'toolchains/llvm/prebuilt/%s-x86_64/bin/llvm-readelf' % platform, 178 }, 179 'llvm-symbolizer': { 180 'is_binutils': False, 181 'path_in_ndk': 182 lambda platform: 'toolchains/llvm/prebuilt/%s-x86_64/bin/llvm-symbolizer' % platform, 183 }, 184 'objdump': { 185 'is_binutils': True, 186 }, 187 'strip': { 188 'is_binutils': True, 189 }, 190 } 191 192 @classmethod 193 def find_ndk_and_sdk_paths(cls, ndk_path: Optional[str] = None 194 ) -> Iterator[Tuple[Optional[str], Optional[str]]]: 195 # Use the given ndk path. 196 if ndk_path and os.path.isdir(ndk_path): 197 ndk_path = os.path.abspath(ndk_path) 198 yield ndk_path, cls.find_sdk_path(ndk_path) 199 # Find ndk in the parent directory containing simpleperf scripts. 200 ndk_path = os.path.dirname(os.path.abspath(get_script_dir())) 201 yield ndk_path, cls.find_sdk_path(ndk_path) 202 # Find ndk in the default sdk installation path. 203 if is_windows(): 204 home = os.environ.get('HOMEDRIVE') + os.environ.get('HOMEPATH') 205 else: 206 home = os.environ.get('HOME') 207 if home: 208 platform = get_platform() 209 sdk_path = os.path.join(home, cls.DEFAULT_SDK_PATH[platform].replace('/', os.sep)) 210 if os.path.isdir(sdk_path): 211 path = os.path.join(sdk_path, 'ndk') 212 if os.path.isdir(path): 213 # Android Studio can install multiple ndk versions in 'ndk'. 214 # Find the newest one. 215 ndk_version = None 216 for name in os.listdir(path): 217 if not ndk_version or ndk_version < name: 218 ndk_version = name 219 if ndk_version: 220 yield os.path.join(path, ndk_version), sdk_path 221 ndk_path = os.path.join(sdk_path, 'ndk-bundle') 222 if os.path.isdir(ndk_path): 223 yield ndk_path, sdk_path 224 225 @classmethod 226 def find_sdk_path(cls, ndk_path: str) -> Optional[str]: 227 path = ndk_path 228 for _ in range(2): 229 path = os.path.dirname(path) 230 if os.path.isdir(os.path.join(path, 'platform-tools')): 231 return path 232 return None 233 234 @classmethod 235 def _get_binutils_path_in_ndk(cls, toolname: str, arch: Optional[str], platform: str 236 ) -> Tuple[str, str]: 237 if not arch: 238 arch = 'arm64' 239 if arch == 'arm64': 240 name = 'aarch64-linux-android-' + toolname 241 elif arch == 'arm': 242 name = 'arm-linux-androideabi-' + toolname 243 elif arch == 'x86_64': 244 name = 'x86_64-linux-android-' + toolname 245 elif arch == 'x86': 246 name = 'i686-linux-android-' + toolname 247 else: 248 log_fatal('unexpected arch %s' % arch) 249 path = 'toolchains/llvm/prebuilt/%s-x86_64/bin/%s' % (platform, name) 250 return (name, path) 251 252 @classmethod 253 def find_tool_path(cls, toolname: str, ndk_path: Optional[str] = None, 254 arch: Optional[str] = None) -> Optional[str]: 255 tool_info = cls.EXPECTED_TOOLS.get(toolname) 256 if not tool_info: 257 return None 258 259 is_binutils = tool_info['is_binutils'] 260 test_option = tool_info.get('test_option', '--help') 261 platform = get_platform() 262 263 # Find tool in clang prebuilts in Android platform. 264 if toolname.startswith('llvm-') and platform == 'linux' and get_script_dir().endswith( 265 'system/extras/simpleperf/scripts'): 266 path = str( 267 Path(get_script_dir()).parents[3] / 'prebuilts' / 'clang' / 'host' / 'linux-x86' / 268 'llvm-binutils-stable' / toolname) 269 if is_executable_available(path, test_option): 270 return path 271 272 # Find tool in NDK or SDK. 273 path_in_ndk = None 274 path_in_sdk = None 275 if is_binutils: 276 toolname_with_arch, path_in_ndk = cls._get_binutils_path_in_ndk( 277 toolname, arch, platform) 278 else: 279 toolname_with_arch = toolname 280 if 'path_in_ndk' in tool_info: 281 path_in_ndk = tool_info['path_in_ndk'](platform) 282 elif 'path_in_sdk' in tool_info: 283 path_in_sdk = tool_info['path_in_sdk'] 284 if path_in_ndk: 285 path_in_ndk = path_in_ndk.replace('/', os.sep) 286 elif path_in_sdk: 287 path_in_sdk = path_in_sdk.replace('/', os.sep) 288 289 for ndk_dir, sdk_dir in cls.find_ndk_and_sdk_paths(ndk_path): 290 if path_in_ndk and ndk_dir: 291 path = os.path.join(ndk_dir, path_in_ndk) 292 if is_executable_available(path, test_option): 293 return path 294 elif path_in_sdk and sdk_dir: 295 path = os.path.join(sdk_dir, path_in_sdk) 296 if is_executable_available(path, test_option): 297 return path 298 299 # Find tool in $PATH. 300 if is_executable_available(toolname_with_arch, test_option): 301 return toolname_with_arch 302 303 # Find tool without arch in $PATH. 304 if is_binutils and tool_info.get('accept_tool_without_arch'): 305 if is_executable_available(toolname, test_option): 306 return toolname 307 return None 308 309 310class AdbHelper(object): 311 def __init__(self, enable_switch_to_root: bool = True): 312 adb_path = ToolFinder.find_tool_path('adb') 313 if not adb_path: 314 log_exit("Can't find adb in PATH environment.") 315 self.adb_path: str = adb_path 316 self.enable_switch_to_root = enable_switch_to_root 317 self.serial_number: Optional[str] = None 318 319 def is_device_available(self) -> bool: 320 return self.run_and_return_output(['shell', 'whoami'])[0] 321 322 def run(self, adb_args: List[str], log_output: bool = False, log_stderr: bool = False) -> bool: 323 return self.run_and_return_output(adb_args, log_output, log_stderr)[0] 324 325 def run_and_return_output(self, adb_args: List[str], log_output: bool = False, 326 log_stderr: bool = False) -> Tuple[bool, str]: 327 adb_args = [self.adb_path] + adb_args 328 log_debug('run adb cmd: %s' % adb_args) 329 env = None 330 if self.serial_number: 331 env = os.environ.copy() 332 env['ANDROID_SERIAL'] = self.serial_number 333 subproc = subprocess.Popen( 334 adb_args, env=env, stdout=subprocess.PIPE, stderr=subprocess.PIPE) 335 stdout_data, stderr_data = subproc.communicate() 336 stdout_data = bytes_to_str(stdout_data) 337 stderr_data = bytes_to_str(stderr_data) 338 returncode = subproc.returncode 339 result = (returncode == 0) 340 if log_output and stdout_data: 341 log_debug(stdout_data) 342 if log_stderr and stderr_data: 343 log_warning(stderr_data) 344 log_debug('run adb cmd: %s [result %s]' % (adb_args, result)) 345 return (result, stdout_data) 346 347 def check_run(self, adb_args: List[str], log_output: bool = False): 348 self.check_run_and_return_output(adb_args, log_output) 349 350 def check_run_and_return_output(self, adb_args: List[str], log_output: bool = False, 351 log_stderr: bool = False) -> str: 352 result, stdoutdata = self.run_and_return_output(adb_args, log_output, True) 353 if not result: 354 log_exit('run "adb %s" failed: %s' % (adb_args, stdoutdata)) 355 return stdoutdata 356 357 def _unroot(self): 358 result, stdoutdata = self.run_and_return_output(['shell', 'whoami']) 359 if not result: 360 return 361 if 'root' not in stdoutdata: 362 return 363 log_info('unroot adb') 364 self.run(['unroot']) 365 self.run(['wait-for-device']) 366 time.sleep(1) 367 368 def switch_to_root(self) -> bool: 369 if not self.enable_switch_to_root: 370 self._unroot() 371 return False 372 result, stdoutdata = self.run_and_return_output(['shell', 'whoami']) 373 if not result: 374 return False 375 if 'root' in stdoutdata: 376 return True 377 build_type = self.get_property('ro.build.type') 378 if build_type == 'user': 379 return False 380 self.run(['root']) 381 time.sleep(1) 382 self.run(['wait-for-device']) 383 result, stdoutdata = self.run_and_return_output(['shell', 'whoami']) 384 return result and 'root' in stdoutdata 385 386 def get_property(self, name: str) -> Optional[str]: 387 result, stdoutdata = self.run_and_return_output(['shell', 'getprop', name]) 388 return stdoutdata if result else None 389 390 def set_property(self, name: str, value: str) -> bool: 391 return self.run(['shell', 'setprop', name, value]) 392 393 def get_device_arch(self) -> str: 394 output = self.check_run_and_return_output(['shell', 'uname', '-m']) 395 if 'aarch64' in output: 396 return 'arm64' 397 if 'arm' in output: 398 return 'arm' 399 if 'x86_64' in output: 400 return 'x86_64' 401 if '86' in output: 402 return 'x86' 403 log_fatal('unsupported architecture: %s' % output.strip()) 404 return '' 405 406 def get_android_version(self) -> int: 407 """ Get Android version on device, like 7 is for Android N, 8 is for Android O.""" 408 build_version = self.get_property('ro.build.version.release') 409 android_version = 0 410 if build_version: 411 if not build_version[0].isdigit(): 412 c = build_version[0].upper() 413 if c.isupper() and c >= 'L': 414 android_version = ord(c) - ord('L') + 5 415 else: 416 strs = build_version.split('.') 417 if strs: 418 android_version = int(strs[0]) 419 return android_version 420 421 422def flatten_arg_list(arg_list: List[List[str]]) -> List[str]: 423 res = [] 424 if arg_list: 425 for items in arg_list: 426 res += items 427 return res 428 429 430def remove(dir_or_file: Union[Path, str]): 431 if os.path.isfile(dir_or_file): 432 os.remove(dir_or_file) 433 elif os.path.isdir(dir_or_file): 434 shutil.rmtree(dir_or_file, ignore_errors=True) 435 436 437def open_report_in_browser(report_path: str): 438 if is_darwin(): 439 # On darwin 10.12.6, webbrowser can't open browser, so try `open` cmd first. 440 try: 441 subprocess.check_call(['open', report_path]) 442 return 443 except subprocess.CalledProcessError: 444 pass 445 import webbrowser 446 try: 447 # Try to open the report with Chrome 448 browser = webbrowser.get('google-chrome') 449 browser.open(report_path, new=0, autoraise=True) 450 except webbrowser.Error: 451 # webbrowser.get() doesn't work well on darwin/windows. 452 webbrowser.open_new_tab(report_path) 453 454 455class BinaryFinder: 456 def __init__(self, binary_cache_dir: Optional[Union[Path, str]], readelf: ReadElf): 457 if isinstance(binary_cache_dir, str): 458 binary_cache_dir = Path(binary_cache_dir) 459 self.binary_cache_dir = binary_cache_dir 460 self.readelf = readelf 461 self.build_id_map = self._load_build_id_map() 462 463 def _load_build_id_map(self) -> Dict[str, Path]: 464 build_id_map: Dict[str, Path] = {} 465 if self.binary_cache_dir: 466 build_id_list_file = self.binary_cache_dir / 'build_id_list' 467 if build_id_list_file.is_file(): 468 with open(self.binary_cache_dir / 'build_id_list', 'rb') as fh: 469 for line in fh.readlines(): 470 # lines are in format "<build_id>=<path_in_binary_cache>". 471 items = bytes_to_str(line).strip().split('=') 472 if len(items) == 2: 473 build_id_map[items[0]] = self.binary_cache_dir / items[1] 474 return build_id_map 475 476 def find_binary(self, dso_path_in_record_file: str, 477 expected_build_id: Optional[str]) -> Optional[Path]: 478 """ If expected_build_id is None, don't check build id. 479 Otherwise, the build id of the found binary should match the expected one.""" 480 # Find binary from build id map. 481 if expected_build_id: 482 path = self.build_id_map.get(expected_build_id) 483 if path and self._check_path(path, expected_build_id): 484 return path 485 # Find binary by path in binary cache. 486 if self.binary_cache_dir: 487 path = self.binary_cache_dir / dso_path_in_record_file[1:] 488 if self._check_path(path, expected_build_id): 489 return path 490 # Find binary by its absolute path. 491 path = Path(dso_path_in_record_file) 492 if self._check_path(path, expected_build_id): 493 return path 494 return None 495 496 def _check_path(self, path: Path, expected_build_id: Optional[str]) -> bool: 497 if not self.readelf.is_elf_file(path): 498 return False 499 if expected_build_id is not None: 500 return self.readelf.get_build_id(path) == expected_build_id 501 return True 502 503 504class Addr2Nearestline(object): 505 """ Use llvm-symbolizer to convert (dso_path, func_addr, addr) to (source_file, line). 506 For instructions generated by C++ compilers without a matching statement in source code 507 (like stack corruption check, switch optimization, etc.), addr2line can't generate 508 line information. However, we want to assign the instruction to the nearest line before 509 the instruction (just like objdump -dl). So we use below strategy: 510 Instead of finding the exact line of the instruction in an address, we find the nearest 511 line to the instruction in an address. If an address doesn't have a line info, we find 512 the line info of address - 1. If still no line info, then use address - 2, address - 3, 513 etc. 514 515 The implementation steps are as below: 516 1. Collect all (dso_path, func_addr, addr) requests before converting. This saves the 517 times to call addr2line. 518 2. Convert addrs to (source_file, line) pairs for each dso_path as below: 519 2.1 Check if the dso_path has .debug_line. If not, omit its conversion. 520 2.2 Get arch of the dso_path, and decide the addr_step for it. addr_step is the step we 521 change addr each time. For example, since instructions of arm64 are all 4 bytes long, 522 addr_step for arm64 can be 4. 523 2.3 Use addr2line to find line info for each addr in the dso_path. 524 2.4 For each addr without line info, use addr2line to find line info for 525 range(addr - addr_step, addr - addr_step * 4 - 1, -addr_step). 526 2.5 For each addr without line info, use addr2line to find line info for 527 range(addr - addr_step * 5, addr - addr_step * 128 - 1, -addr_step). 528 (128 is a guess number. A nested switch statement in 529 system/core/demangle/Demangler.cpp has >300 bytes without line info in arm64.) 530 """ 531 class Dso(object): 532 """ Info of a dynamic shared library. 533 addrs: a map from address to Addr object in this dso. 534 """ 535 536 def __init__(self, build_id: Optional[str]): 537 self.build_id = build_id 538 self.addrs: Dict[int, Addr2Nearestline.Addr] = {} 539 540 class Addr(object): 541 """ Info of an addr request. 542 func_addr: start_addr of the function containing addr. 543 source_lines: a list of [file_id, line_number] for addr. 544 source_lines[:-1] are all for inlined functions. 545 """ 546 547 def __init__(self, func_addr: int): 548 self.func_addr = func_addr 549 self.source_lines: Optional[List[int, int]] = None 550 551 def __init__( 552 self, ndk_path: Optional[str], 553 binary_finder: BinaryFinder, with_function_name: bool): 554 self.symbolizer_path = ToolFinder.find_tool_path('llvm-symbolizer', ndk_path) 555 if not self.symbolizer_path: 556 log_exit("Can't find llvm-symbolizer. Please set ndk path with --ndk_path option.") 557 self.readelf = ReadElf(ndk_path) 558 self.dso_map: Dict[str, Addr2Nearestline.Dso] = {} # map from dso_path to Dso. 559 self.binary_finder = binary_finder 560 self.with_function_name = with_function_name 561 # Saving file names for each addr takes a lot of memory. So we store file ids in Addr, 562 # and provide data structures connecting file id and file name here. 563 self.file_name_to_id: Dict[str, int] = {} 564 self.file_id_to_name: List[str] = [] 565 self.func_name_to_id: Dict[str, int] = {} 566 self.func_id_to_name: List[str] = [] 567 568 def add_addr(self, dso_path: str, build_id: Optional[str], func_addr: int, addr: int): 569 dso = self.dso_map.get(dso_path) 570 if dso is None: 571 dso = self.dso_map[dso_path] = self.Dso(build_id) 572 if addr not in dso.addrs: 573 dso.addrs[addr] = self.Addr(func_addr) 574 575 def convert_addrs_to_lines(self): 576 for dso_path, dso in self.dso_map.items(): 577 self._convert_addrs_in_one_dso(dso_path, dso) 578 579 def _convert_addrs_in_one_dso(self, dso_path: str, dso: Addr2Nearestline.Dso): 580 real_path = self.binary_finder.find_binary(dso_path, dso.build_id) 581 if not real_path: 582 if dso_path not in ['//anon', 'unknown', '[kernel.kallsyms]']: 583 log_debug("Can't find dso %s" % dso_path) 584 return 585 586 if not self._check_debug_line_section(real_path): 587 log_debug("file %s doesn't contain .debug_line section." % real_path) 588 return 589 590 addr_step = self._get_addr_step(real_path) 591 self._collect_line_info(dso, real_path, [0]) 592 self._collect_line_info(dso, real_path, range(-addr_step, -addr_step * 4 - 1, -addr_step)) 593 self._collect_line_info(dso, real_path, 594 range(-addr_step * 5, -addr_step * 128 - 1, -addr_step)) 595 596 def _check_debug_line_section(self, real_path: Path) -> bool: 597 return '.debug_line' in self.readelf.get_sections(real_path) 598 599 def _get_addr_step(self, real_path: Path) -> int: 600 arch = self.readelf.get_arch(real_path) 601 if arch == 'arm64': 602 return 4 603 if arch == 'arm': 604 return 2 605 return 1 606 607 def _collect_line_info( 608 self, dso: Addr2Nearestline.Dso, real_path: Path, addr_shifts: List[int]): 609 """ Use addr2line to get line info in a dso, with given addr shifts. """ 610 # 1. Collect addrs to send to addr2line. 611 addr_set: Set[int] = set() 612 for addr in dso.addrs: 613 addr_obj = dso.addrs[addr] 614 if addr_obj.source_lines: # already has source line, no need to search. 615 continue 616 for shift in addr_shifts: 617 # The addr after shift shouldn't change to another function. 618 shifted_addr = max(addr + shift, addr_obj.func_addr) 619 addr_set.add(shifted_addr) 620 if shifted_addr == addr_obj.func_addr: 621 break 622 if not addr_set: 623 return 624 addr_request = '\n'.join(['0x%x' % addr for addr in sorted(addr_set)]) 625 626 # 2. Use addr2line to collect line info. 627 try: 628 subproc = subprocess.Popen(self._build_symbolizer_args(real_path), 629 stdin=subprocess.PIPE, stdout=subprocess.PIPE) 630 (stdoutdata, _) = subproc.communicate(str_to_bytes(addr_request)) 631 stdoutdata = bytes_to_str(stdoutdata) 632 except OSError: 633 return 634 addr_map: Dict[int, List[Tuple[int]]] = {} 635 cur_line_list: Optional[List[Tuple[int]]] = None 636 need_function_name = self.with_function_name 637 cur_function_name: Optional[str] = None 638 for line in stdoutdata.strip().split('\n'): 639 line = line.strip() 640 if not line: 641 continue 642 if line[:2] == '0x': 643 # a new address 644 cur_line_list = addr_map[int(line, 16)] = [] 645 elif need_function_name: 646 cur_function_name = line.strip() 647 need_function_name = False 648 else: 649 need_function_name = self.with_function_name 650 if cur_line_list is None: 651 continue 652 file_path, line_number = self._parse_source_location(line) 653 if not file_path or not line_number: 654 # An addr can have a list of (file, line), when the addr belongs to an inlined 655 # function. Sometimes only part of the list has ? mark. In this case, we think 656 # the line info is valid if the first line doesn't have ? mark. 657 if not cur_line_list: 658 cur_line_list = None 659 continue 660 file_id = self._get_file_id(file_path) 661 if self.with_function_name: 662 func_id = self._get_func_id(cur_function_name) 663 cur_line_list.append((file_id, line_number, func_id)) 664 else: 665 cur_line_list.append((file_id, line_number)) 666 667 # 3. Fill line info in dso.addrs. 668 for addr in dso.addrs: 669 addr_obj = dso.addrs[addr] 670 if addr_obj.source_lines: 671 continue 672 for shift in addr_shifts: 673 shifted_addr = max(addr + shift, addr_obj.func_addr) 674 lines = addr_map.get(shifted_addr) 675 if lines: 676 addr_obj.source_lines = lines 677 break 678 if shifted_addr == addr_obj.func_addr: 679 break 680 681 def _build_symbolizer_args(self, binary_path: Path) -> List[str]: 682 args = [self.symbolizer_path, '--print-address', '--inlining', '--obj=%s' % binary_path] 683 if self.with_function_name: 684 args += ['--functions=linkage', '--demangle'] 685 else: 686 args.append('--functions=none') 687 return args 688 689 def _parse_source_location(self, line: str) -> Tuple[Optional[str], Optional[int]]: 690 file_path, line_number = None, None 691 # Handle lines in format filename:line:column, like "runtest/two_functions.cpp:14:25". 692 # Filename may contain ':' like "C:\Users\...\file". 693 items = line.rsplit(':', 2) 694 if len(items) == 3: 695 file_path, line_number = items[:2] 696 if not file_path or ('?' in file_path) or not line_number or ('?' in line_number): 697 return None, None 698 try: 699 line_number = int(line_number) 700 except ValueError: 701 return None, None 702 return file_path, line_number 703 704 def _get_file_id(self, file_path: str) -> int: 705 file_id = self.file_name_to_id.get(file_path) 706 if file_id is None: 707 file_id = self.file_name_to_id[file_path] = len(self.file_id_to_name) 708 self.file_id_to_name.append(file_path) 709 return file_id 710 711 def _get_func_id(self, func_name: str) -> int: 712 func_id = self.func_name_to_id.get(func_name) 713 if func_id is None: 714 func_id = self.func_name_to_id[func_name] = len(self.func_id_to_name) 715 self.func_id_to_name.append(func_name) 716 return func_id 717 718 def get_dso(self, dso_path: str) -> Addr2Nearestline.Dso: 719 return self.dso_map.get(dso_path) 720 721 def get_addr_source(self, dso: Addr2Nearestline.Dso, addr: int) -> Optional[List[Tuple[int]]]: 722 source = dso.addrs[addr].source_lines 723 if source is None: 724 return None 725 if self.with_function_name: 726 return [(self.file_id_to_name[file_id], line, self.func_id_to_name[func_id]) 727 for (file_id, line, func_id) in source] 728 return [(self.file_id_to_name[file_id], line) for (file_id, line) in source] 729 730 731class SourceFileSearcher(object): 732 """ Find source file paths in the file system. 733 The file paths reported by addr2line are the paths stored in debug sections 734 of shared libraries. And we need to convert them to file paths in the file 735 system. It is done in below steps: 736 1. Collect all file paths under the provided source_dirs. The suffix of a 737 source file should contain one of below: 738 h: for C/C++ header files. 739 c: for C/C++ source files. 740 java: for Java source files. 741 kt: for Kotlin source files. 742 2. Given an abstract_path reported by addr2line, select the best real path 743 as below: 744 2.1 Find all real paths with the same file name as the abstract path. 745 2.2 Select the real path having the longest common suffix with the abstract path. 746 """ 747 748 SOURCE_FILE_EXTS = {'.h', '.hh', '.H', '.hxx', '.hpp', '.h++', 749 '.c', '.cc', '.C', '.cxx', '.cpp', '.c++', 750 '.java', '.kt'} 751 752 @classmethod 753 def is_source_filename(cls, filename: str) -> bool: 754 ext = os.path.splitext(filename)[1] 755 return ext in cls.SOURCE_FILE_EXTS 756 757 def __init__(self, source_dirs: List[str]): 758 # Map from filename to a list of reversed directory path containing filename. 759 self.filename_to_rparents: Dict[str, List[str]] = {} 760 self._collect_paths(source_dirs) 761 762 def _collect_paths(self, source_dirs: List[str]): 763 for source_dir in source_dirs: 764 for parent, _, file_names in os.walk(source_dir): 765 rparent = None 766 for file_name in file_names: 767 if self.is_source_filename(file_name): 768 rparents = self.filename_to_rparents.get(file_name) 769 if rparents is None: 770 rparents = self.filename_to_rparents[file_name] = [] 771 if rparent is None: 772 rparent = parent[::-1] 773 rparents.append(rparent) 774 775 def get_real_path(self, abstract_path: str) -> Optional[str]: 776 abstract_path = abstract_path.replace('/', os.sep) 777 abstract_parent, file_name = os.path.split(abstract_path) 778 abstract_rparent = abstract_parent[::-1] 779 real_rparents = self.filename_to_rparents.get(file_name) 780 if real_rparents is None: 781 return None 782 best_matched_rparent = None 783 best_common_length = -1 784 for real_rparent in real_rparents: 785 length = len(os.path.commonprefix((real_rparent, abstract_rparent))) 786 if length > best_common_length: 787 best_common_length = length 788 best_matched_rparent = real_rparent 789 if best_matched_rparent is None: 790 return None 791 return os.path.join(best_matched_rparent[::-1], file_name) 792 793 794class Objdump(object): 795 """ A wrapper of objdump to disassemble code. """ 796 797 def __init__(self, ndk_path: Optional[str], binary_finder: BinaryFinder): 798 self.ndk_path = ndk_path 799 self.binary_finder = binary_finder 800 self.readelf = ReadElf(ndk_path) 801 self.objdump_paths: Dict[str, str] = {} 802 803 def get_dso_info(self, dso_path: str, expected_build_id: Optional[str] 804 ) -> Optional[Tuple[str, str]]: 805 real_path = self.binary_finder.find_binary(dso_path, expected_build_id) 806 if not real_path: 807 return None 808 arch = self.readelf.get_arch(real_path) 809 if arch == 'unknown': 810 return None 811 return (str(real_path), arch) 812 813 def disassemble_code(self, dso_info, start_addr, addr_len) -> List[Tuple[str, int]]: 814 """ Disassemble [start_addr, start_addr + addr_len] of dso_path. 815 Return a list of pair (disassemble_code_line, addr). 816 """ 817 real_path, arch = dso_info 818 objdump_path = self.objdump_paths.get(arch) 819 if not objdump_path: 820 if arch == 'arm': 821 # llvm-objdump for arm is not good at showing branch targets. 822 # So still prefer objdump. 823 objdump_path = ToolFinder.find_tool_path('objdump', self.ndk_path, arch) 824 if not objdump_path: 825 objdump_path = ToolFinder.find_tool_path('llvm-objdump', self.ndk_path, arch) 826 if not objdump_path: 827 log_exit("Can't find llvm-objdump. Please set ndk path with --ndk_path option.") 828 self.objdump_paths[arch] = objdump_path 829 830 # 3. Run objdump. 831 args = [objdump_path, '-dlC', '--no-show-raw-insn', 832 '--start-address=0x%x' % start_addr, 833 '--stop-address=0x%x' % (start_addr + addr_len), 834 real_path] 835 if arch == 'arm' and 'llvm-objdump' in objdump_path: 836 args += ['--print-imm-hex'] 837 try: 838 subproc = subprocess.Popen(args, stdin=subprocess.PIPE, stdout=subprocess.PIPE) 839 (stdoutdata, _) = subproc.communicate() 840 stdoutdata = bytes_to_str(stdoutdata) 841 except OSError: 842 return None 843 844 if not stdoutdata: 845 return None 846 result = [] 847 for line in stdoutdata.split('\n'): 848 line = line.rstrip() # Remove '\r' on Windows. 849 items = line.split(':', 1) 850 try: 851 addr = int(items[0], 16) 852 except ValueError: 853 addr = 0 854 result.append((line, addr)) 855 return result 856 857 858class ReadElf(object): 859 """ A wrapper of readelf. """ 860 861 def __init__(self, ndk_path: Optional[str]): 862 self.readelf_path = ToolFinder.find_tool_path('llvm-readelf', ndk_path) 863 if not self.readelf_path: 864 log_exit("Can't find llvm-readelf. Please set ndk path with --ndk_path option.") 865 866 @staticmethod 867 def is_elf_file(path: Union[Path, str]) -> bool: 868 if os.path.isfile(path): 869 with open(path, 'rb') as fh: 870 return fh.read(4) == b'\x7fELF' 871 return False 872 873 def get_arch(self, elf_file_path: Union[Path, str]) -> str: 874 """ Get arch of an elf file. """ 875 if self.is_elf_file(elf_file_path): 876 try: 877 output = subprocess.check_output([self.readelf_path, '-h', str(elf_file_path)]) 878 output = bytes_to_str(output) 879 if output.find('AArch64') != -1: 880 return 'arm64' 881 if output.find('ARM') != -1: 882 return 'arm' 883 if output.find('X86-64') != -1: 884 return 'x86_64' 885 if output.find('80386') != -1: 886 return 'x86' 887 except subprocess.CalledProcessError: 888 pass 889 return 'unknown' 890 891 def get_build_id(self, elf_file_path: Union[Path, str], with_padding=True) -> str: 892 """ Get build id of an elf file. """ 893 if self.is_elf_file(elf_file_path): 894 try: 895 output = subprocess.check_output([self.readelf_path, '-n', str(elf_file_path)]) 896 output = bytes_to_str(output) 897 result = re.search(r'Build ID:\s*(\S+)', output) 898 if result: 899 build_id = result.group(1) 900 if with_padding: 901 build_id = self.pad_build_id(build_id) 902 return build_id 903 except subprocess.CalledProcessError: 904 pass 905 return "" 906 907 @staticmethod 908 def pad_build_id(build_id: str) -> str: 909 """ Pad build id to 40 hex numbers (20 bytes). """ 910 if len(build_id) < 40: 911 build_id += '0' * (40 - len(build_id)) 912 else: 913 build_id = build_id[:40] 914 return '0x' + build_id 915 916 def get_sections(self, elf_file_path: Union[Path, str]) -> List[str]: 917 """ Get sections of an elf file. """ 918 section_names: List[str] = [] 919 if self.is_elf_file(elf_file_path): 920 try: 921 output = subprocess.check_output([self.readelf_path, '-SW', str(elf_file_path)]) 922 output = bytes_to_str(output) 923 for line in output.split('\n'): 924 # Parse line like:" [ 1] .note.android.ident NOTE 0000000000400190 ...". 925 result = re.search(r'^\s+\[\s*\d+\]\s(.+?)\s', line) 926 if result: 927 section_name = result.group(1).strip() 928 if section_name: 929 section_names.append(section_name) 930 except subprocess.CalledProcessError: 931 pass 932 return section_names 933 934 935def extant_dir(arg: str) -> str: 936 """ArgumentParser type that only accepts extant directories. 937 938 Args: 939 arg: The string argument given on the command line. 940 Returns: The argument as a realpath. 941 Raises: 942 argparse.ArgumentTypeError: The given path isn't a directory. 943 """ 944 path = os.path.realpath(arg) 945 if not os.path.isdir(path): 946 raise argparse.ArgumentTypeError('{} is not a directory.'.format(path)) 947 return path 948 949 950def extant_file(arg: str) -> str: 951 """ArgumentParser type that only accepts extant files. 952 953 Args: 954 arg: The string argument given on the command line. 955 Returns: The argument as a realpath. 956 Raises: 957 argparse.ArgumentTypeError: The given path isn't a file. 958 """ 959 path = os.path.realpath(arg) 960 if not os.path.isfile(path): 961 raise argparse.ArgumentTypeError('{} is not a file.'.format(path)) 962 return path 963 964 965class ArgParseFormatter( 966 argparse.ArgumentDefaultsHelpFormatter, argparse.RawDescriptionHelpFormatter): 967 pass 968 969 970logging.getLogger().setLevel(logging.DEBUG) 971