1#!/usr/bin/env python3 2# 3# Copyright (C) 2016 The Android Open Source Project 4# 5# Licensed under the Apache License, Version 2.0 (the "License"); 6# you may not use this file except in compliance with the License. 7# You may obtain a copy of the License at 8# 9# http://www.apache.org/licenses/LICENSE-2.0 10# 11# Unless required by applicable law or agreed to in writing, software 12# distributed under the License is distributed on an "AS IS" BASIS, 13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14# See the License for the specific language governing permissions and 15# limitations under the License. 16# 17 18"""utils.py: export utility functions. 19""" 20 21from __future__ import annotations 22import argparse 23from concurrent.futures import Future, ThreadPoolExecutor 24from dataclasses import dataclass 25import logging 26import os 27import os.path 28from pathlib import Path 29import re 30import shutil 31import subprocess 32import sys 33import time 34from typing import Any, Dict, Iterator, List, Optional, Set, Tuple, Union 35 36 37NDK_ERROR_MESSAGE = "Please install the Android NDK (https://developer.android.com/studio/projects/install-ndk), then set NDK path with --ndk_path option." 38 39 40def get_script_dir() -> str: 41 return os.path.dirname(os.path.realpath(__file__)) 42 43 44def is_windows() -> bool: 45 return sys.platform == 'win32' or sys.platform == 'cygwin' 46 47 48def is_darwin() -> bool: 49 return sys.platform == 'darwin' 50 51 52def get_platform() -> str: 53 if is_windows(): 54 return 'windows' 55 if is_darwin(): 56 return 'darwin' 57 return 'linux' 58 59 60def str_to_bytes(str_value: str) -> bytes: 61 # In python 3, str are wide strings whereas the C api expects 8 bit strings, 62 # hence we have to convert. For now using utf-8 as the encoding. 63 return str_value.encode('utf-8') 64 65 66def bytes_to_str(bytes_value: Optional[bytes]) -> str: 67 if not bytes_value: 68 return '' 69 return bytes_value.decode('utf-8') 70 71 72def get_target_binary_path(arch: str, binary_name: str) -> str: 73 if arch == 'aarch64': 74 arch = 'arm64' 75 arch_dir = os.path.join(get_script_dir(), "bin", "android", arch) 76 if not os.path.isdir(arch_dir): 77 log_fatal("can't find arch directory: %s" % arch_dir) 78 binary_path = os.path.join(arch_dir, binary_name) 79 if not os.path.isfile(binary_path): 80 log_fatal("can't find binary: %s" % binary_path) 81 return binary_path 82 83 84def get_host_binary_path(binary_name: str) -> str: 85 dirname = os.path.join(get_script_dir(), 'bin') 86 if is_windows(): 87 if binary_name.endswith('.so'): 88 binary_name = binary_name[0:-3] + '.dll' 89 elif '.' not in binary_name: 90 binary_name += '.exe' 91 dirname = os.path.join(dirname, 'windows') 92 elif sys.platform == 'darwin': # OSX 93 if binary_name.endswith('.so'): 94 binary_name = binary_name[0:-3] + '.dylib' 95 dirname = os.path.join(dirname, 'darwin') 96 else: 97 dirname = os.path.join(dirname, 'linux') 98 dirname = os.path.join(dirname, 'x86_64' if sys.maxsize > 2 ** 32 else 'x86') 99 binary_path = os.path.join(dirname, binary_name) 100 if not os.path.isfile(binary_path): 101 log_fatal("can't find binary: %s" % binary_path) 102 return binary_path 103 104 105def is_executable_available(executable: str, option='--help') -> bool: 106 """ Run an executable to see if it exists. """ 107 try: 108 subproc = subprocess.Popen([executable, option], stdout=subprocess.PIPE, 109 stderr=subprocess.PIPE) 110 subproc.communicate() 111 return subproc.returncode == 0 112 except OSError: 113 return False 114 115 116class ToolFinder: 117 """ Find tools in ndk or sdk. """ 118 DEFAULT_SDK_PATH = { 119 'darwin': 'Library/Android/sdk', 120 'linux': 'Android/Sdk', 121 'windows': 'AppData/Local/Android/sdk', 122 } 123 124 EXPECTED_TOOLS = { 125 'adb': { 126 'is_binutils': False, 127 'test_option': 'version', 128 'path_in_sdk': 'platform-tools/adb', 129 }, 130 'llvm-objdump': { 131 'is_binutils': False, 132 'path_in_ndk': 133 lambda platform: 'toolchains/llvm/prebuilt/%s-x86_64/bin/llvm-objdump' % platform, 134 }, 135 'llvm-readelf': { 136 'is_binutils': False, 137 'path_in_ndk': 138 lambda platform: 'toolchains/llvm/prebuilt/%s-x86_64/bin/llvm-readelf' % platform, 139 }, 140 'llvm-symbolizer': { 141 'is_binutils': False, 142 'path_in_ndk': 143 lambda platform: 'toolchains/llvm/prebuilt/%s-x86_64/bin/llvm-symbolizer' % platform, 144 }, 145 'llvm-strip': { 146 'is_binutils': False, 147 'path_in_ndk': 148 lambda platform: 'toolchains/llvm/prebuilt/%s-x86_64/bin/llvm-strip' % platform, 149 }, 150 } 151 152 @classmethod 153 def find_ndk_and_sdk_paths(cls, ndk_path: Optional[str] = None 154 ) -> Iterator[Tuple[Optional[str], Optional[str]]]: 155 # Use the given ndk path. 156 if ndk_path and os.path.isdir(ndk_path): 157 ndk_path = os.path.abspath(ndk_path) 158 yield ndk_path, cls.find_sdk_path(ndk_path) 159 # Find ndk in the parent directory containing simpleperf scripts. 160 ndk_path = os.path.dirname(os.path.abspath(get_script_dir())) 161 yield ndk_path, cls.find_sdk_path(ndk_path) 162 # Find ndk in the default sdk installation path. 163 if is_windows(): 164 home = os.environ.get('HOMEDRIVE') + os.environ.get('HOMEPATH') 165 else: 166 home = os.environ.get('HOME') 167 if home: 168 platform = get_platform() 169 sdk_path = os.path.join(home, cls.DEFAULT_SDK_PATH[platform].replace('/', os.sep)) 170 if os.path.isdir(sdk_path): 171 path = os.path.join(sdk_path, 'ndk') 172 if os.path.isdir(path): 173 # Android Studio can install multiple ndk versions in 'ndk'. 174 # Find the newest one. 175 ndk_version = None 176 for name in os.listdir(path): 177 if not ndk_version or ndk_version < name: 178 ndk_version = name 179 if ndk_version: 180 yield os.path.join(path, ndk_version), sdk_path 181 ndk_path = os.path.join(sdk_path, 'ndk-bundle') 182 if os.path.isdir(ndk_path): 183 yield ndk_path, sdk_path 184 185 @classmethod 186 def find_sdk_path(cls, ndk_path: str) -> Optional[str]: 187 path = ndk_path 188 for _ in range(2): 189 path = os.path.dirname(path) 190 if os.path.isdir(os.path.join(path, 'platform-tools')): 191 return path 192 return None 193 194 @classmethod 195 def _get_binutils_path_in_ndk(cls, toolname: str, arch: Optional[str], platform: str 196 ) -> Tuple[str, str]: 197 if not arch: 198 arch = 'arm64' 199 if arch == 'arm64': 200 name = 'aarch64-linux-android-' + toolname 201 elif arch == 'arm': 202 name = 'arm-linux-androideabi-' + toolname 203 elif arch == 'x86_64': 204 name = 'x86_64-linux-android-' + toolname 205 elif arch == 'x86': 206 name = 'i686-linux-android-' + toolname 207 else: 208 log_fatal('unexpected arch %s' % arch) 209 path = 'toolchains/llvm/prebuilt/%s-x86_64/bin/%s' % (platform, name) 210 return (name, path) 211 212 @classmethod 213 def find_tool_path(cls, toolname: str, ndk_path: Optional[str] = None, 214 arch: Optional[str] = None) -> Optional[str]: 215 tool_info = cls.EXPECTED_TOOLS.get(toolname) 216 if not tool_info: 217 return None 218 219 is_binutils = tool_info['is_binutils'] 220 test_option = tool_info.get('test_option', '--help') 221 platform = get_platform() 222 223 # Find tool in clang prebuilts in Android platform. 224 if toolname.startswith('llvm-') and platform == 'linux' and get_script_dir().endswith( 225 'system/extras/simpleperf/scripts'): 226 path = str( 227 Path(get_script_dir()).parents[3] / 'prebuilts' / 'clang' / 'host' / 'linux-x86' / 228 'llvm-binutils-stable' / toolname) 229 if is_executable_available(path, test_option): 230 return path 231 232 # Find tool in NDK or SDK. 233 path_in_ndk = None 234 path_in_sdk = None 235 if is_binutils: 236 toolname_with_arch, path_in_ndk = cls._get_binutils_path_in_ndk( 237 toolname, arch, platform) 238 else: 239 toolname_with_arch = toolname 240 if 'path_in_ndk' in tool_info: 241 path_in_ndk = tool_info['path_in_ndk'](platform) 242 elif 'path_in_sdk' in tool_info: 243 path_in_sdk = tool_info['path_in_sdk'] 244 if path_in_ndk: 245 path_in_ndk = path_in_ndk.replace('/', os.sep) 246 elif path_in_sdk: 247 path_in_sdk = path_in_sdk.replace('/', os.sep) 248 249 for ndk_dir, sdk_dir in cls.find_ndk_and_sdk_paths(ndk_path): 250 if path_in_ndk and ndk_dir: 251 path = os.path.join(ndk_dir, path_in_ndk) 252 if is_executable_available(path, test_option): 253 return path 254 elif path_in_sdk and sdk_dir: 255 path = os.path.join(sdk_dir, path_in_sdk) 256 if is_executable_available(path, test_option): 257 return path 258 259 # Find tool in $PATH. 260 if is_executable_available(toolname_with_arch, test_option): 261 return toolname_with_arch 262 263 # Find tool without arch in $PATH. 264 if is_binutils and tool_info.get('accept_tool_without_arch'): 265 if is_executable_available(toolname, test_option): 266 return toolname 267 return None 268 269 270class AdbHelper(object): 271 def __init__(self, enable_switch_to_root: bool = True): 272 adb_path = ToolFinder.find_tool_path('adb') 273 if not adb_path: 274 log_exit("Can't find adb in PATH environment.") 275 self.adb_path: str = adb_path 276 self.enable_switch_to_root = enable_switch_to_root 277 self.serial_number: Optional[str] = None 278 279 def is_device_available(self) -> bool: 280 return self.run_and_return_output(['shell', 'whoami'])[0] 281 282 def run(self, adb_args: List[str], log_output: bool = False, log_stderr: bool = False) -> bool: 283 return self.run_and_return_output(adb_args, log_output, log_stderr)[0] 284 285 def run_and_return_output(self, adb_args: List[str], log_output: bool = False, 286 log_stderr: bool = False) -> Tuple[bool, str]: 287 adb_args = [self.adb_path] + adb_args 288 logging.debug('run adb cmd: %s' % adb_args) 289 env = None 290 if self.serial_number: 291 env = os.environ.copy() 292 env['ANDROID_SERIAL'] = self.serial_number 293 subproc = subprocess.Popen( 294 adb_args, env=env, stdout=subprocess.PIPE, stderr=subprocess.PIPE) 295 stdout_data, stderr_data = subproc.communicate() 296 stdout_data = bytes_to_str(stdout_data) 297 stderr_data = bytes_to_str(stderr_data) 298 returncode = subproc.returncode 299 result = (returncode == 0) 300 if log_output and stdout_data: 301 logging.debug(stdout_data) 302 if log_stderr and stderr_data: 303 logging.warning(stderr_data) 304 logging.debug('run adb cmd: %s [result %s]' % (adb_args, result)) 305 return (result, stdout_data) 306 307 def check_run(self, adb_args: List[str], log_output: bool = False): 308 self.check_run_and_return_output(adb_args, log_output) 309 310 def check_run_and_return_output(self, adb_args: List[str], log_output: bool = False, 311 log_stderr: bool = False) -> str: 312 result, stdoutdata = self.run_and_return_output(adb_args, log_output, True) 313 if not result: 314 log_exit('run "adb %s" failed: %s' % (adb_args, stdoutdata)) 315 return stdoutdata 316 317 def _unroot(self): 318 result, stdoutdata = self.run_and_return_output(['shell', 'whoami']) 319 if not result: 320 return 321 if 'root' not in stdoutdata: 322 return 323 logging.info('unroot adb') 324 self.run(['unroot']) 325 time.sleep(1) 326 self.run(['wait-for-device']) 327 328 def switch_to_root(self) -> bool: 329 if not self.enable_switch_to_root: 330 self._unroot() 331 return False 332 result, stdoutdata = self.run_and_return_output(['shell', 'whoami']) 333 if not result: 334 return False 335 if 'root' in stdoutdata: 336 return True 337 build_type = self.get_property('ro.build.type') 338 if build_type == 'user': 339 return False 340 self.run(['root']) 341 time.sleep(1) 342 self.run(['wait-for-device']) 343 result, stdoutdata = self.run_and_return_output(['shell', 'whoami']) 344 return result and 'root' in stdoutdata 345 346 def get_property(self, name: str) -> Optional[str]: 347 result, stdoutdata = self.run_and_return_output(['shell', 'getprop', name]) 348 return stdoutdata.strip() if result else None 349 350 def set_property(self, name: str, value: str) -> bool: 351 return self.run(['shell', 'setprop', name, value]) 352 353 def get_device_arch(self) -> str: 354 output = self.check_run_and_return_output(['shell', 'uname', '-m']) 355 if 'aarch64' in output: 356 return 'arm64' 357 if 'arm' in output: 358 return 'arm' 359 if 'x86_64' in output: 360 return 'x86_64' 361 if '86' in output: 362 return 'x86' 363 log_fatal('unsupported architecture: %s' % output.strip()) 364 return '' 365 366 def get_android_version(self) -> int: 367 """ Get Android version on device, like 7 is for Android N, 8 is for Android O.""" 368 build_version = self.get_property('ro.build.version.codename') 369 if not build_version or build_version == 'REL': 370 build_version = self.get_property('ro.build.version.release') 371 android_version = 0 372 if build_version: 373 if build_version[0].isdigit(): 374 i = 1 375 while i < len(build_version) and build_version[i].isdigit(): 376 i += 1 377 android_version = int(build_version[:i]) 378 else: 379 c = build_version[0].upper() 380 if c.isupper() and c >= 'L': 381 android_version = ord(c) - ord('L') + 5 382 return android_version 383 384 385def flatten_arg_list(arg_list: List[List[str]]) -> List[str]: 386 res = [] 387 if arg_list: 388 for items in arg_list: 389 res += items 390 return res 391 392 393def remove(dir_or_file: Union[Path, str]): 394 if os.path.isfile(dir_or_file): 395 os.remove(dir_or_file) 396 elif os.path.isdir(dir_or_file): 397 shutil.rmtree(dir_or_file, ignore_errors=True) 398 399 400def open_report_in_browser(report_path: str): 401 if is_darwin(): 402 # On darwin 10.12.6, webbrowser can't open browser, so try `open` cmd first. 403 try: 404 subprocess.check_call(['open', report_path]) 405 return 406 except subprocess.CalledProcessError: 407 pass 408 import webbrowser 409 try: 410 # Try to open the report with Chrome 411 browser = webbrowser.get('google-chrome') 412 browser.open(report_path, new=0, autoraise=True) 413 except webbrowser.Error: 414 # webbrowser.get() doesn't work well on darwin/windows. 415 webbrowser.open_new_tab(report_path) 416 417 418class BinaryFinder: 419 def __init__(self, binary_cache_dir: Optional[Union[Path, str]], readelf: ReadElf): 420 if isinstance(binary_cache_dir, str): 421 binary_cache_dir = Path(binary_cache_dir) 422 self.binary_cache_dir = binary_cache_dir 423 self.readelf = readelf 424 self.build_id_map = self._load_build_id_map() 425 426 def _load_build_id_map(self) -> Dict[str, Path]: 427 build_id_map: Dict[str, Path] = {} 428 if self.binary_cache_dir: 429 build_id_list_file = self.binary_cache_dir / 'build_id_list' 430 if build_id_list_file.is_file(): 431 with open(self.binary_cache_dir / 'build_id_list', 'rb') as fh: 432 for line in fh.readlines(): 433 # lines are in format "<build_id>=<path_in_binary_cache>". 434 items = bytes_to_str(line).strip().split('=') 435 if len(items) == 2: 436 build_id_map[items[0]] = self.binary_cache_dir / items[1] 437 return build_id_map 438 439 def find_binary(self, dso_path_in_record_file: str, 440 expected_build_id: Optional[str]) -> Optional[Path]: 441 """ If expected_build_id is None, don't check build id. 442 Otherwise, the build id of the found binary should match the expected one.""" 443 # Find binary from build id map. 444 if expected_build_id: 445 path = self.build_id_map.get(expected_build_id) 446 if path and self._check_path(path, expected_build_id): 447 return path 448 # Find binary by path in binary cache. 449 if self.binary_cache_dir: 450 path = self.binary_cache_dir / dso_path_in_record_file[1:].replace('/', os.sep) 451 if self._check_path(path, expected_build_id): 452 return path 453 # Find binary by its absolute path. 454 path = Path(dso_path_in_record_file) 455 if self._check_path(path, expected_build_id): 456 return path 457 return None 458 459 def _check_path(self, path: Path, expected_build_id: Optional[str]) -> bool: 460 if not self.readelf.is_elf_file(path): 461 return False 462 if expected_build_id is not None: 463 return self.readelf.get_build_id(path) == expected_build_id 464 return True 465 466 467class Addr2Nearestline(object): 468 """ Use llvm-symbolizer to convert (dso_path, func_addr, addr) to (source_file, line). 469 For instructions generated by C++ compilers without a matching statement in source code 470 (like stack corruption check, switch optimization, etc.), addr2line can't generate 471 line information. However, we want to assign the instruction to the nearest line before 472 the instruction (just like objdump -dl). So we use below strategy: 473 Instead of finding the exact line of the instruction in an address, we find the nearest 474 line to the instruction in an address. If an address doesn't have a line info, we find 475 the line info of address - 1. If still no line info, then use address - 2, address - 3, 476 etc. 477 478 The implementation steps are as below: 479 1. Collect all (dso_path, func_addr, addr) requests before converting. This saves the 480 times to call addr2line. 481 2. Convert addrs to (source_file, line) pairs for each dso_path as below: 482 2.1 Check if the dso_path has .debug_line. If not, omit its conversion. 483 2.2 Get arch of the dso_path, and decide the addr_step for it. addr_step is the step we 484 change addr each time. For example, since instructions of arm64 are all 4 bytes long, 485 addr_step for arm64 can be 4. 486 2.3 Use addr2line to find line info for each addr in the dso_path. 487 2.4 For each addr without line info, use addr2line to find line info for 488 range(addr - addr_step, addr - addr_step * 4 - 1, -addr_step). 489 2.5 For each addr without line info, use addr2line to find line info for 490 range(addr - addr_step * 5, addr - addr_step * 128 - 1, -addr_step). 491 (128 is a guess number. A nested switch statement in 492 system/core/demangle/Demangler.cpp has >300 bytes without line info in arm64.) 493 """ 494 class Dso(object): 495 """ Info of a dynamic shared library. 496 addrs: a map from address to Addr object in this dso. 497 """ 498 499 def __init__(self, build_id: Optional[str]): 500 self.build_id = build_id 501 self.addrs: Dict[int, Addr2Nearestline.Addr] = {} 502 # Saving file names for each addr takes a lot of memory. So we store file ids in Addr, 503 # and provide data structures connecting file id and file name here. 504 self.file_name_to_id: Dict[str, int] = {} 505 self.file_id_to_name: List[str] = [] 506 self.func_name_to_id: Dict[str, int] = {} 507 self.func_id_to_name: List[str] = [] 508 509 def get_file_id(self, file_path: str) -> int: 510 file_id = self.file_name_to_id.get(file_path) 511 if file_id is None: 512 file_id = self.file_name_to_id[file_path] = len(self.file_id_to_name) 513 self.file_id_to_name.append(file_path) 514 return file_id 515 516 def get_func_id(self, func_name: str) -> int: 517 func_id = self.func_name_to_id.get(func_name) 518 if func_id is None: 519 func_id = self.func_name_to_id[func_name] = len(self.func_id_to_name) 520 self.func_id_to_name.append(func_name) 521 return func_id 522 523 class Addr(object): 524 """ Info of an addr request. 525 func_addr: start_addr of the function containing addr. 526 source_lines: a list of [file_id, line_number] for addr. 527 source_lines[:-1] are all for inlined functions. 528 """ 529 530 def __init__(self, func_addr: int): 531 self.func_addr = func_addr 532 self.source_lines: Optional[List[int, int]] = None 533 534 def __init__( 535 self, ndk_path: Optional[str], 536 binary_finder: BinaryFinder, with_function_name: bool): 537 self.symbolizer_path = ToolFinder.find_tool_path('llvm-symbolizer', ndk_path) 538 if not self.symbolizer_path: 539 log_exit("Can't find llvm-symbolizer. " + NDK_ERROR_MESSAGE) 540 self.readelf = ReadElf(ndk_path) 541 self.dso_map: Dict[str, Addr2Nearestline.Dso] = {} # map from dso_path to Dso. 542 self.binary_finder = binary_finder 543 self.with_function_name = with_function_name 544 545 def add_addr(self, dso_path: str, build_id: Optional[str], func_addr: int, addr: int): 546 dso = self.dso_map.get(dso_path) 547 if dso is None: 548 dso = self.dso_map[dso_path] = self.Dso(build_id) 549 if addr not in dso.addrs: 550 dso.addrs[addr] = self.Addr(func_addr) 551 552 def convert_addrs_to_lines(self, jobs: int): 553 with ThreadPoolExecutor(jobs) as executor: 554 futures: List[Future] = [] 555 for dso_path, dso in self.dso_map.items(): 556 futures.append(executor.submit(self._convert_addrs_in_one_dso, dso_path, dso)) 557 for future in futures: 558 # Call future.result() to report exceptions raised in the executor. 559 future.result() 560 561 def _convert_addrs_in_one_dso(self, dso_path: str, dso: Addr2Nearestline.Dso): 562 real_path = self.binary_finder.find_binary(dso_path, dso.build_id) 563 if not real_path: 564 if dso_path not in ['//anon', 'unknown', '[kernel.kallsyms]']: 565 logging.debug("Can't find dso %s" % dso_path) 566 return 567 568 if not self._check_debug_line_section(real_path): 569 logging.debug("file %s doesn't contain .debug_line section." % real_path) 570 return 571 572 addr_step = self._get_addr_step(real_path) 573 self._collect_line_info(dso, real_path, [0]) 574 self._collect_line_info(dso, real_path, range(-addr_step, -addr_step * 4 - 1, -addr_step)) 575 self._collect_line_info(dso, real_path, 576 range(-addr_step * 5, -addr_step * 128 - 1, -addr_step)) 577 578 def _check_debug_line_section(self, real_path: Path) -> bool: 579 return '.debug_line' in self.readelf.get_sections(real_path) 580 581 def _get_addr_step(self, real_path: Path) -> int: 582 arch = self.readelf.get_arch(real_path) 583 if arch == 'arm64': 584 return 4 585 if arch == 'arm': 586 return 2 587 return 1 588 589 def _collect_line_info( 590 self, dso: Addr2Nearestline.Dso, real_path: Path, addr_shifts: List[int]): 591 """ Use addr2line to get line info in a dso, with given addr shifts. """ 592 # 1. Collect addrs to send to addr2line. 593 addr_set: Set[int] = set() 594 for addr in dso.addrs: 595 addr_obj = dso.addrs[addr] 596 if addr_obj.source_lines: # already has source line, no need to search. 597 continue 598 for shift in addr_shifts: 599 # The addr after shift shouldn't change to another function. 600 shifted_addr = max(addr + shift, addr_obj.func_addr) 601 addr_set.add(shifted_addr) 602 if shifted_addr == addr_obj.func_addr: 603 break 604 if not addr_set: 605 return 606 addr_request = '\n'.join(['0x%x' % addr for addr in sorted(addr_set)]) 607 608 # 2. Use addr2line to collect line info. 609 try: 610 subproc = subprocess.Popen(self._build_symbolizer_args(real_path), 611 stdin=subprocess.PIPE, stdout=subprocess.PIPE) 612 (stdoutdata, _) = subproc.communicate(str_to_bytes(addr_request)) 613 stdoutdata = bytes_to_str(stdoutdata) 614 except OSError: 615 return 616 addr_map = self.parse_line_output(stdoutdata, dso) 617 618 # 3. Fill line info in dso.addrs. 619 for addr in dso.addrs: 620 addr_obj = dso.addrs[addr] 621 if addr_obj.source_lines: 622 continue 623 for shift in addr_shifts: 624 shifted_addr = max(addr + shift, addr_obj.func_addr) 625 lines = addr_map.get(shifted_addr) 626 if lines: 627 addr_obj.source_lines = lines 628 break 629 if shifted_addr == addr_obj.func_addr: 630 break 631 632 def _build_symbolizer_args(self, binary_path: Path) -> List[str]: 633 args = [self.symbolizer_path, '--print-address', '--inlining', '--obj=%s' % binary_path] 634 if self.with_function_name: 635 args += ['--functions=linkage', '--demangle'] 636 else: 637 args.append('--functions=none') 638 return args 639 640 def parse_line_output(self, output: str, dso: Addr2Nearestline.Dso) -> Dict[int, 641 List[Tuple[int]]]: 642 """ 643 The output is a list of lines. 644 address1 645 function_name1 (the function name can be empty) 646 source_location1 647 function_name2 648 source_location2 649 ... 650 (end with empty line) 651 """ 652 653 addr_map: Dict[int, List[Tuple[int]]] = {} 654 lines = output.strip().splitlines() 655 i = 0 656 while i < len(lines): 657 address = self._parse_line_output_address(lines[i]) 658 i += 1 659 if address is None: 660 continue 661 info = [] 662 while i < len(lines): 663 if self.with_function_name: 664 if i + 1 == len(lines): 665 break 666 function_name = lines[i].strip() 667 if not function_name and (':' not in lines[i+1]): 668 # no more frames 669 break 670 i += 1 671 elif not lines[i]: 672 i += 1 673 break 674 675 file_path, line_number = self._parse_line_output_source_location(lines[i]) 676 i += 1 677 if not file_path or not line_number: 678 # An addr can have a list of (file, line), when the addr belongs to an inlined 679 # function. Sometimes only part of the list has ? mark. In this case, we think 680 # the line info is valid if the first line doesn't have ? mark. 681 if not info: 682 break 683 continue 684 file_id = dso.get_file_id(file_path) 685 if self.with_function_name: 686 func_id = dso.get_func_id(function_name) 687 info.append((file_id, line_number, func_id)) 688 else: 689 info.append((file_id, line_number)) 690 if info: 691 addr_map[address] = info 692 return addr_map 693 694 def _parse_line_output_address(self, output: str) -> Optional[int]: 695 if output.startswith('0x'): 696 return int(output, 16) 697 return None 698 699 def _parse_line_output_source_location(self, line: str) -> Tuple[Optional[str], Optional[int]]: 700 file_path, line_number = None, None 701 # Handle lines in format filename:line:column, like "runtest/two_functions.cpp:14:25". 702 # Filename may contain ':' like "C:\Users\...\file". 703 items = line.rsplit(':', 2) 704 if len(items) == 3: 705 file_path, line_number = items[:2] 706 if not file_path or ('?' in file_path) or not line_number or ('?' in line_number): 707 return None, None 708 try: 709 line_number = int(line_number) 710 except ValueError: 711 return None, None 712 return file_path, line_number 713 714 def get_dso(self, dso_path: str) -> Addr2Nearestline.Dso: 715 return self.dso_map.get(dso_path) 716 717 def get_addr_source(self, dso: Addr2Nearestline.Dso, addr: int) -> Optional[List[Tuple[int]]]: 718 source = dso.addrs[addr].source_lines 719 if source is None: 720 return None 721 if self.with_function_name: 722 return [(dso.file_id_to_name[file_id], line, dso.func_id_to_name[func_id]) 723 for (file_id, line, func_id) in source] 724 return [(dso.file_id_to_name[file_id], line) for (file_id, line) in source] 725 726 727class SourceFileSearcher(object): 728 """ Find source file paths in the file system. 729 The file paths reported by addr2line are the paths stored in debug sections 730 of shared libraries. And we need to convert them to file paths in the file 731 system. It is done in below steps: 732 1. Collect all file paths under the provided source_dirs. The suffix of a 733 source file should contain one of below: 734 h: for C/C++ header files. 735 c: for C/C++ source files. 736 java: for Java source files. 737 kt: for Kotlin source files. 738 2. Given an abstract_path reported by addr2line, select the best real path 739 as below: 740 2.1 Find all real paths with the same file name as the abstract path. 741 2.2 Select the real path having the longest common suffix with the abstract path. 742 """ 743 744 SOURCE_FILE_EXTS = {'.h', '.hh', '.H', '.hxx', '.hpp', '.h++', 745 '.c', '.cc', '.C', '.cxx', '.cpp', '.c++', 746 '.java', '.kt'} 747 748 @classmethod 749 def is_source_filename(cls, filename: str) -> bool: 750 ext = os.path.splitext(filename)[1] 751 return ext in cls.SOURCE_FILE_EXTS 752 753 def __init__(self, source_dirs: List[str]): 754 # Map from filename to a list of reversed directory path containing filename. 755 self.filename_to_rparents: Dict[str, List[str]] = {} 756 self._collect_paths(source_dirs) 757 758 def _collect_paths(self, source_dirs: List[str]): 759 for source_dir in source_dirs: 760 for parent, _, file_names in os.walk(source_dir): 761 rparent = None 762 for file_name in file_names: 763 if self.is_source_filename(file_name): 764 rparents = self.filename_to_rparents.get(file_name) 765 if rparents is None: 766 rparents = self.filename_to_rparents[file_name] = [] 767 if rparent is None: 768 rparent = parent[::-1] 769 rparents.append(rparent) 770 771 def get_real_path(self, abstract_path: str) -> Optional[str]: 772 abstract_path = abstract_path.replace('/', os.sep) 773 abstract_parent, file_name = os.path.split(abstract_path) 774 abstract_rparent = abstract_parent[::-1] 775 real_rparents = self.filename_to_rparents.get(file_name) 776 if real_rparents is None: 777 return None 778 best_matched_rparent = None 779 best_common_length = -1 780 for real_rparent in real_rparents: 781 length = len(os.path.commonprefix((real_rparent, abstract_rparent))) 782 if length > best_common_length: 783 best_common_length = length 784 best_matched_rparent = real_rparent 785 if best_matched_rparent is None: 786 return None 787 return os.path.join(best_matched_rparent[::-1], file_name) 788 789 790class Objdump(object): 791 """ A wrapper of objdump to disassemble code. """ 792 793 def __init__(self, ndk_path: Optional[str], binary_finder: BinaryFinder): 794 self.ndk_path = ndk_path 795 self.binary_finder = binary_finder 796 self.readelf = ReadElf(ndk_path) 797 self.objdump_paths: Dict[str, str] = {} 798 799 def get_dso_info(self, dso_path: str, expected_build_id: Optional[str] 800 ) -> Optional[Tuple[str, str]]: 801 real_path = self.binary_finder.find_binary(dso_path, expected_build_id) 802 if not real_path: 803 return None 804 arch = self.readelf.get_arch(real_path) 805 if arch == 'unknown': 806 return None 807 return (str(real_path), arch) 808 809 def disassemble_code(self, dso_info, start_addr, addr_len) -> List[Tuple[str, int]]: 810 """ Disassemble [start_addr, start_addr + addr_len] of dso_path. 811 Return a list of pair (disassemble_code_line, addr). 812 """ 813 real_path, arch = dso_info 814 objdump_path = self.objdump_paths.get(arch) 815 if not objdump_path: 816 objdump_path = ToolFinder.find_tool_path('llvm-objdump', self.ndk_path, arch) 817 if not objdump_path: 818 log_exit("Can't find llvm-objdump." + NDK_ERROR_MESSAGE) 819 self.objdump_paths[arch] = objdump_path 820 821 # 3. Run objdump. 822 args = [objdump_path, '-dlC', '--no-show-raw-insn', 823 '--start-address=0x%x' % start_addr, 824 '--stop-address=0x%x' % (start_addr + addr_len), 825 real_path] 826 if arch == 'arm' and 'llvm-objdump' in objdump_path: 827 args += ['--print-imm-hex'] 828 try: 829 subproc = subprocess.Popen(args, stdin=subprocess.PIPE, stdout=subprocess.PIPE) 830 (stdoutdata, _) = subproc.communicate() 831 stdoutdata = bytes_to_str(stdoutdata) 832 except OSError: 833 return None 834 835 if not stdoutdata: 836 return None 837 result = [] 838 for line in stdoutdata.split('\n'): 839 line = line.rstrip() # Remove '\r' on Windows. 840 items = line.split(':', 1) 841 try: 842 addr = int(items[0], 16) 843 except ValueError: 844 addr = 0 845 result.append((line, addr)) 846 return result 847 848 849class ReadElf(object): 850 """ A wrapper of readelf. """ 851 852 def __init__(self, ndk_path: Optional[str]): 853 self.readelf_path = ToolFinder.find_tool_path('llvm-readelf', ndk_path) 854 if not self.readelf_path: 855 log_exit("Can't find llvm-readelf. " + NDK_ERROR_MESSAGE) 856 857 @staticmethod 858 def is_elf_file(path: Union[Path, str]) -> bool: 859 if os.path.isfile(path): 860 with open(path, 'rb') as fh: 861 return fh.read(4) == b'\x7fELF' 862 return False 863 864 def get_arch(self, elf_file_path: Union[Path, str]) -> str: 865 """ Get arch of an elf file. """ 866 if self.is_elf_file(elf_file_path): 867 try: 868 output = subprocess.check_output([self.readelf_path, '-h', str(elf_file_path)]) 869 output = bytes_to_str(output) 870 if output.find('AArch64') != -1: 871 return 'arm64' 872 if output.find('ARM') != -1: 873 return 'arm' 874 if output.find('X86-64') != -1: 875 return 'x86_64' 876 if output.find('80386') != -1: 877 return 'x86' 878 except subprocess.CalledProcessError: 879 pass 880 return 'unknown' 881 882 def get_build_id(self, elf_file_path: Union[Path, str], with_padding=True) -> str: 883 """ Get build id of an elf file. """ 884 if self.is_elf_file(elf_file_path): 885 try: 886 output = subprocess.check_output([self.readelf_path, '-n', str(elf_file_path)]) 887 output = bytes_to_str(output) 888 result = re.search(r'Build ID:\s*(\S+)', output) 889 if result: 890 build_id = result.group(1) 891 if with_padding: 892 build_id = self.pad_build_id(build_id) 893 return build_id 894 except subprocess.CalledProcessError: 895 pass 896 return "" 897 898 @staticmethod 899 def pad_build_id(build_id: str) -> str: 900 """ Pad build id to 40 hex numbers (20 bytes). """ 901 if len(build_id) < 40: 902 build_id += '0' * (40 - len(build_id)) 903 else: 904 build_id = build_id[:40] 905 return '0x' + build_id 906 907 @staticmethod 908 def unpad_build_id(build_id: str) -> str: 909 if build_id.startswith('0x'): 910 build_id = build_id[2:] 911 # Unpad build id as TrimZeroesFromBuildIDString() in quipper. 912 padding = '0' * 8 913 while build_id.endswith(padding): 914 build_id = build_id[:-len(padding)] 915 return build_id 916 917 def get_sections(self, elf_file_path: Union[Path, str]) -> List[str]: 918 """ Get sections of an elf file. """ 919 section_names: List[str] = [] 920 if self.is_elf_file(elf_file_path): 921 try: 922 output = subprocess.check_output([self.readelf_path, '-SW', str(elf_file_path)]) 923 output = bytes_to_str(output) 924 for line in output.split('\n'): 925 # Parse line like:" [ 1] .note.android.ident NOTE 0000000000400190 ...". 926 result = re.search(r'^\s+\[\s*\d+\]\s(.+?)\s', line) 927 if result: 928 section_name = result.group(1).strip() 929 if section_name: 930 section_names.append(section_name) 931 except subprocess.CalledProcessError: 932 pass 933 return section_names 934 935 936def extant_dir(arg: str) -> str: 937 """ArgumentParser type that only accepts extant directories. 938 939 Args: 940 arg: The string argument given on the command line. 941 Returns: The argument as a realpath. 942 Raises: 943 argparse.ArgumentTypeError: The given path isn't a directory. 944 """ 945 path = os.path.realpath(arg) 946 if not os.path.isdir(path): 947 raise argparse.ArgumentTypeError('{} is not a directory.'.format(path)) 948 return path 949 950 951def extant_file(arg: str) -> str: 952 """ArgumentParser type that only accepts extant files. 953 954 Args: 955 arg: The string argument given on the command line. 956 Returns: The argument as a realpath. 957 Raises: 958 argparse.ArgumentTypeError: The given path isn't a file. 959 """ 960 path = os.path.realpath(arg) 961 if not os.path.isfile(path): 962 raise argparse.ArgumentTypeError('{} is not a file.'.format(path)) 963 return path 964 965 966def log_fatal(msg: str): 967 raise Exception(msg) 968 969 970def log_exit(msg: str): 971 sys.exit(msg) 972 973 974class LogFormatter(logging.Formatter): 975 """ Use custom logging format. """ 976 977 def __init__(self): 978 super().__init__('%(asctime)s [%(levelname)s] (%(filename)s:%(lineno)d) %(message)s') 979 980 def formatTime(self, record, datefmt): 981 return super().formatTime(record, '%H:%M:%S') + ',%03d' % record.msecs 982 983 984class Log: 985 initialized = False 986 987 @classmethod 988 def init(cls, log_level: str = 'info'): 989 assert not cls.initialized 990 cls.initialized = True 991 cls.logger = logging.root 992 cls.logger.setLevel(log_level.upper()) 993 handler = logging.StreamHandler() 994 handler.setFormatter(LogFormatter()) 995 cls.logger.addHandler(handler) 996 997 998class ArgParseFormatter( 999 argparse.ArgumentDefaultsHelpFormatter, argparse.RawDescriptionHelpFormatter): 1000 pass 1001 1002 1003@dataclass 1004class ReportLibOptions: 1005 show_art_frames: bool 1006 trace_offcpu: str 1007 proguard_mapping_files: List[str] 1008 sample_filters: List[str] 1009 aggregate_threads: List[str] 1010 1011 1012class BaseArgumentParser(argparse.ArgumentParser): 1013 def __init__(self, *args, **kwargs): 1014 super().__init__(*args, **kwargs, formatter_class=ArgParseFormatter) 1015 self.has_sample_filter_options = False 1016 self.sample_filter_with_pid_shortcut = False 1017 self.has_report_lib_options = False 1018 1019 def add_report_lib_options(self, group: Optional[Any] = None, 1020 default_show_art_frames: bool = False, 1021 sample_filter_group: Optional[Any] = None, 1022 sample_filter_with_pid_shortcut: bool = True): 1023 self.has_report_lib_options = True 1024 parser = group if group else self 1025 parser.add_argument( 1026 '--proguard-mapping-file', nargs='+', 1027 help='Add proguard mapping file to de-obfuscate symbols') 1028 parser.add_argument('--show-art-frames', '--show_art_frames', 1029 action=argparse.BooleanOptionalAction, default=default_show_art_frames, 1030 help='Show frames of internal methods in the ART Java interpreter.') 1031 parser.add_argument( 1032 '--trace-offcpu', choices=['on-cpu', 'off-cpu', 'on-off-cpu', 'mixed-on-off-cpu'], 1033 help="""Set report mode for profiles recorded with --trace-offcpu option. All possible 1034 modes are: on-cpu (only on-cpu samples), off-cpu (only off-cpu samples), 1035 on-off-cpu (both on-cpu and off-cpu samples, can be split by event name), 1036 mixed-on-off-cpu (on-cpu and off-cpu samples using the same event name). 1037 If not set, mixed-on-off-cpu mode is used. 1038 """) 1039 self._add_sample_filter_options(sample_filter_group, sample_filter_with_pid_shortcut) 1040 parser.add_argument( 1041 '--aggregate-threads', nargs='+', metavar='thread_name_regex', 1042 help="""Aggregate threads with names matching the same regex. As a result, samples from 1043 different threads (like a thread pool) can be shown in one flamegraph. 1044 """) 1045 1046 def _add_sample_filter_options( 1047 self, group: Optional[Any] = None, with_pid_shortcut: bool = True): 1048 if not group: 1049 group = self.add_argument_group('Sample filter options') 1050 group.add_argument('--exclude-pid', metavar='pid', nargs='+', type=int, 1051 help='exclude samples for selected processes') 1052 group.add_argument('--exclude-tid', metavar='tid', nargs='+', type=int, 1053 help='exclude samples for selected threads') 1054 group.add_argument( 1055 '--exclude-process-name', metavar='process_name_regex', nargs='+', 1056 help='exclude samples for processes with name containing the regular expression') 1057 group.add_argument( 1058 '--exclude-thread-name', metavar='thread_name_regex', nargs='+', 1059 help='exclude samples for threads with name containing the regular expression') 1060 1061 if with_pid_shortcut: 1062 group.add_argument('--pid', metavar='pid', nargs='+', type=int, 1063 help='only include samples for selected processes') 1064 group.add_argument('--tid', metavar='tid', nargs='+', type=int, 1065 help='only include samples for selected threads') 1066 group.add_argument('--include-pid', metavar='pid', nargs='+', type=int, 1067 help='only include samples for selected processes') 1068 group.add_argument('--include-tid', metavar='tid', nargs='+', type=int, 1069 help='only include samples for selected threads') 1070 group.add_argument( 1071 '--include-process-name', metavar='process_name_regex', nargs='+', 1072 help='only include samples for processes with name containing the regular expression') 1073 group.add_argument( 1074 '--comm', '--include-thread-name', metavar='thread_name_regex', 1075 dest='include_thread_name', nargs='+', 1076 help='only include samples for threads with name containing the regular expression') 1077 group.add_argument( 1078 '--filter-file', metavar='file', 1079 help='use filter file to filter samples based on timestamps. ' + 1080 'The file format is in doc/sampler_filter.md.') 1081 self.has_sample_filter_options = True 1082 self.sample_filter_with_pid_shortcut = with_pid_shortcut 1083 1084 def _build_sample_filter(self, args: argparse.Namespace) -> List[str]: 1085 """ Build sample filters, which can be passed to ReportLib.SetSampleFilter(). """ 1086 filters = [] 1087 if args.exclude_pid: 1088 filters.extend(['--exclude-pid', ','.join(str(pid) for pid in args.exclude_pid)]) 1089 if args.exclude_tid: 1090 filters.extend(['--exclude-tid', ','.join(str(tid) for tid in args.exclude_tid)]) 1091 if args.exclude_process_name: 1092 for name in args.exclude_process_name: 1093 filters.extend(['--exclude-process-name', name]) 1094 if args.exclude_thread_name: 1095 for name in args.exclude_thread_name: 1096 filters.extend(['--exclude-thread-name', name]) 1097 1098 if args.include_pid: 1099 filters.extend(['--include-pid', ','.join(str(pid) for pid in args.include_pid)]) 1100 if args.include_tid: 1101 filters.extend(['--include-tid', ','.join(str(tid) for tid in args.include_tid)]) 1102 if self.sample_filter_with_pid_shortcut: 1103 if args.pid: 1104 filters.extend(['--include-pid', ','.join(str(pid) for pid in args.pid)]) 1105 if args.tid: 1106 filters.extend(['--include-tid', ','.join(str(pid) for pid in args.tid)]) 1107 if args.include_process_name: 1108 for name in args.include_process_name: 1109 filters.extend(['--include-process-name', name]) 1110 if args.include_thread_name: 1111 for name in args.include_thread_name: 1112 filters.extend(['--include-thread-name', name]) 1113 if args.filter_file: 1114 filters.extend(['--filter-file', args.filter_file]) 1115 return filters 1116 1117 def parse_known_args(self, *args, **kwargs): 1118 self.add_argument( 1119 '--log', choices=['debug', 'info', 'warning'], 1120 default='info', help='set log level') 1121 namespace, left_args = super().parse_known_args(*args, **kwargs) 1122 1123 if self.has_report_lib_options: 1124 sample_filters = self._build_sample_filter(namespace) 1125 report_lib_options = ReportLibOptions( 1126 namespace.show_art_frames, namespace.trace_offcpu, namespace.proguard_mapping_file, 1127 sample_filters, namespace.aggregate_threads) 1128 setattr(namespace, 'report_lib_options', report_lib_options) 1129 1130 if not Log.initialized: 1131 Log.init(namespace.log) 1132 return namespace, left_args 1133