1#!/usr/bin/env python3 2# 3# Copyright (C) 2016 The Android Open Source Project 4# 5# Licensed under the Apache License, Version 2.0 (the "License"); 6# you may not use this file except in compliance with the License. 7# You may obtain a copy of the License at 8# 9# http://www.apache.org/licenses/LICENSE-2.0 10# 11# Unless required by applicable law or agreed to in writing, software 12# distributed under the License is distributed on an "AS IS" BASIS, 13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14# See the License for the specific language governing permissions and 15# limitations under the License. 16# 17 18"""binary_cache_builder.py: read perf.data, collect binaries needed by 19 it, and put them in binary_cache. 20""" 21 22from collections import defaultdict 23import logging 24import os 25import os.path 26from pathlib import Path 27import shutil 28import sys 29from typing import Dict, List, Optional, Tuple, Union 30 31from simpleperf_report_lib import ReportLib 32from simpleperf_utils import ( 33 AdbHelper, BaseArgumentParser, extant_dir, extant_file, flatten_arg_list, 34 ReadElf, str_to_bytes) 35 36 37def is_jit_symfile(dso_name): 38 return dso_name.split('/')[-1].startswith('TemporaryFile') 39 40 41class BinaryCache: 42 def __init__(self, binary_dir: Path): 43 self.binary_dir = binary_dir 44 45 def get_path_in_cache(self, device_path: str, build_id: str) -> Path: 46 """ Given a binary path in perf.data, return its corresponding path in the cache. 47 """ 48 if build_id: 49 filename = device_path.split('/')[-1] 50 # Add build id to make the filename unique. 51 unique_filename = build_id[2:] + '-' + filename 52 return self.binary_dir / unique_filename 53 54 # For elf file without build id, we can only follow its path on device. Otherwise, 55 # simpleperf can't find it. However, we don't prefer this way. Because: 56 # 1) It doesn't work for native libs loaded directly from apk 57 # (android:extractNativeLibs=”false”). 58 # 2) It may exceed path limit on windows. 59 if device_path.startswith('/'): 60 device_path = device_path[1:] 61 device_path = device_path.replace('/', os.sep) 62 return Path(os.path.join(self.binary_dir, device_path)) 63 64 65class BinarySource: 66 """ Source to find debug binaries. """ 67 68 def __init__(self, readelf: ReadElf): 69 self.readelf = readelf 70 71 def collect_binaries(self, binaries: Dict[str, str], binary_cache: BinaryCache): 72 """ pull binaries needed in perf.data to binary_cache. 73 binaries: maps from binary path to its build_id in perf.data. 74 """ 75 raise Exception('not implemented') 76 77 def read_build_id(self, path: Path): 78 return self.readelf.get_build_id(path) 79 80 81class BinarySourceFromDevice(BinarySource): 82 """ Pull binaries from device. """ 83 84 def __init__(self, readelf: ReadElf, disable_adb_root: bool): 85 super().__init__(readelf) 86 self.adb = AdbHelper(enable_switch_to_root=not disable_adb_root) 87 88 def collect_binaries(self, binaries: Dict[str, str], binary_cache: BinaryCache): 89 if not self.adb.is_device_available(): 90 return 91 for path, build_id in binaries.items(): 92 self.collect_binary(path, build_id, binary_cache) 93 self.pull_kernel_symbols(binary_cache.binary_dir / 'kallsyms') 94 95 def collect_binary(self, path: str, build_id: str, binary_cache: BinaryCache): 96 if not path.startswith('/') or path == "//anon" or path.startswith("/dev/"): 97 # [kernel.kallsyms] or unknown, or something we can't find binary. 98 return 99 binary_cache_file = binary_cache.get_path_in_cache(path, build_id) 100 self.check_and_pull_binary(path, build_id, binary_cache_file) 101 102 def check_and_pull_binary(self, path: str, expected_build_id: str, binary_cache_file: Path): 103 """If the binary_cache_file exists and has the expected_build_id, there 104 is no need to pull the binary from device. Otherwise, pull it. 105 """ 106 if binary_cache_file.is_file() and ( 107 not expected_build_id or expected_build_id == self.read_build_id(binary_cache_file) 108 ): 109 logging.info('use current file in binary_cache: %s', binary_cache_file) 110 else: 111 logging.info('pull file to binary_cache: %s to %s', path, binary_cache_file) 112 target_dir = binary_cache_file.parent 113 if not target_dir.is_dir(): 114 os.makedirs(target_dir) 115 if binary_cache_file.is_file(): 116 binary_cache_file.unlink() 117 self.pull_file_from_device(path, binary_cache_file) 118 119 def pull_file_from_device(self, device_path: str, host_path: Path): 120 if self.adb.run(['pull', device_path, str(host_path)]): 121 return True 122 # On non-root devices, we can't pull /data/app/XXX/base.odex directly. 123 # Instead, we can first copy the file to /data/local/tmp, then pull it. 124 filename = device_path[device_path.rfind('/')+1:] 125 if (self.adb.run(['shell', 'cp', device_path, '/data/local/tmp']) and 126 self.adb.run(['pull', '/data/local/tmp/' + filename, host_path])): 127 self.adb.run(['shell', 'rm', '/data/local/tmp/' + filename]) 128 return True 129 logging.warning('failed to pull %s from device', device_path) 130 return False 131 132 def pull_kernel_symbols(self, file_path: Path): 133 if file_path.is_file(): 134 file_path.unlink() 135 if self.adb.switch_to_root(): 136 self.adb.run(['shell', 'echo', '0', '>/proc/sys/kernel/kptr_restrict']) 137 self.adb.run(['pull', '/proc/kallsyms', file_path]) 138 139 140class BinarySourceFromLibDirs(BinarySource): 141 """ Collect binaries from lib dirs. """ 142 143 def __init__(self, readelf: ReadElf, lib_dirs: List[Path]): 144 super().__init__(readelf) 145 self.lib_dirs = lib_dirs 146 self.filename_map = None 147 self.build_id_map = None 148 self.binary_cache = None 149 150 def collect_binaries(self, binaries: Dict[str, str], binary_cache: BinaryCache): 151 self.create_filename_map(binaries) 152 self.create_build_id_map(binaries) 153 self.binary_cache = binary_cache 154 155 # Search all files in lib_dirs, and copy matching files to build_cache. 156 for lib_dir in self.lib_dirs: 157 if self.is_platform_symbols_dir(lib_dir): 158 self.search_platform_symbols_dir(lib_dir) 159 else: 160 self.search_dir(lib_dir) 161 162 def create_filename_map(self, binaries: Dict[str, str]): 163 """ Create a map mapping from filename to binaries having the name. """ 164 self.filename_map = defaultdict(list) 165 for path, build_id in binaries.items(): 166 index = path.rfind('/') 167 filename = path[index + 1:] 168 self.filename_map[filename].append((path, build_id)) 169 170 def create_build_id_map(self, binaries: Dict[str, str]): 171 """ Create a map mapping from build id to binary path. """ 172 self.build_id_map = {} 173 for path, build_id in binaries.items(): 174 if build_id: 175 self.build_id_map[build_id] = path 176 177 def is_platform_symbols_dir(self, lib_dir: Path): 178 """ Check if lib_dir points to $ANDROID_PRODUCT_OUT/symbols. """ 179 subdir_names = [p.name for p in lib_dir.iterdir()] 180 return lib_dir.name == 'symbols' and 'system' in subdir_names 181 182 def search_platform_symbols_dir(self, lib_dir: Path): 183 """ Platform symbols dir contains too many binaries. Reading build ids for 184 all of them takes a long time. So we only read build ids for binaries 185 having names exist in filename_map. 186 """ 187 for root, _, files in os.walk(lib_dir): 188 for filename in files: 189 binaries = self.filename_map.get(filename) 190 if not binaries: 191 continue 192 file_path = Path(os.path.join(root, filename)) 193 build_id = self.read_build_id(file_path) 194 for path, expected_build_id in binaries: 195 if expected_build_id == build_id: 196 self.copy_to_binary_cache(file_path, build_id, path) 197 198 def search_dir(self, lib_dir: Path): 199 """ For a normal lib dir, it's unlikely to contain many binaries. So we can read 200 build ids for all binaries in it. But users may give debug binaries with a name 201 different from the one recorded in perf.data. So we should only rely on build id 202 if it is available. 203 """ 204 for root, _, files in os.walk(lib_dir): 205 for filename in files: 206 file_path = Path(os.path.join(root, filename)) 207 build_id = self.read_build_id(file_path) 208 if build_id: 209 # For elf file with build id, use build id to match. 210 device_path = self.build_id_map.get(build_id) 211 if device_path: 212 self.copy_to_binary_cache(file_path, build_id, device_path) 213 elif self.readelf.is_elf_file(file_path): 214 # For elf file without build id, use filename to match. 215 for path, expected_build_id in self.filename_map.get(filename, []): 216 if not expected_build_id: 217 self.copy_to_binary_cache(file_path, '', path) 218 break 219 220 def copy_to_binary_cache( 221 self, from_path: Path, expected_build_id: str, device_path: str): 222 to_path = self.binary_cache.get_path_in_cache(device_path, expected_build_id) 223 if not self.need_to_copy(from_path, to_path, expected_build_id): 224 # The existing file in binary_cache can provide more information, so no need to copy. 225 return 226 to_dir = to_path.parent 227 if not to_dir.is_dir(): 228 os.makedirs(to_dir) 229 logging.info('copy to binary_cache: %s to %s', from_path, to_path) 230 shutil.copy(from_path, to_path) 231 232 def need_to_copy(self, from_path: Path, to_path: Path, expected_build_id: str): 233 if not to_path.is_file() or self.read_build_id(to_path) != expected_build_id: 234 return True 235 return self.get_file_stripped_level(from_path) < self.get_file_stripped_level(to_path) 236 237 def get_file_stripped_level(self, path: Path) -> int: 238 """Return stripped level of an ELF file. Larger value means more stripped.""" 239 sections = self.readelf.get_sections(path) 240 if '.debug_line' in sections: 241 return 0 242 if '.symtab' in sections: 243 return 1 244 return 2 245 246 247class BinaryCacheBuilder: 248 """Collect all binaries needed by perf.data in binary_cache.""" 249 250 def __init__(self, ndk_path: Optional[str], disable_adb_root: bool): 251 self.readelf = ReadElf(ndk_path) 252 self.device_source = BinarySourceFromDevice(self.readelf, disable_adb_root) 253 self.binary_cache_dir = Path('binary_cache') 254 self.binary_cache = BinaryCache(self.binary_cache_dir) 255 self.binaries = {} 256 257 def build_binary_cache(self, perf_data_path: str, symfs_dirs: List[Union[Path, str]]) -> bool: 258 self.binary_cache_dir.mkdir(exist_ok=True) 259 self.collect_used_binaries(perf_data_path) 260 if not self.copy_binaries_from_symfs_dirs(symfs_dirs): 261 return False 262 self.pull_binaries_from_device() 263 self.create_build_id_list() 264 return True 265 266 def collect_used_binaries(self, perf_data_path): 267 """read perf.data, collect all used binaries and their build id(if available).""" 268 # A dict mapping from binary name to build_id 269 binaries = {} 270 lib = ReportLib() 271 lib.SetRecordFile(perf_data_path) 272 lib.SetLogSeverity('error') 273 while True: 274 sample = lib.GetNextSample() 275 if sample is None: 276 lib.Close() 277 break 278 symbols = [lib.GetSymbolOfCurrentSample()] 279 callchain = lib.GetCallChainOfCurrentSample() 280 for i in range(callchain.nr): 281 symbols.append(callchain.entries[i].symbol) 282 283 for symbol in symbols: 284 dso_name = symbol.dso_name 285 if dso_name not in binaries: 286 if is_jit_symfile(dso_name): 287 continue 288 name = 'vmlinux' if dso_name == '[kernel.kallsyms]' else dso_name 289 binaries[name] = lib.GetBuildIdForPath(dso_name) 290 self.binaries = binaries 291 292 def copy_binaries_from_symfs_dirs(self, symfs_dirs: List[Union[str, Path]]) -> bool: 293 if symfs_dirs: 294 lib_dirs: List[Path] = [] 295 for symfs_dir in symfs_dirs: 296 if isinstance(symfs_dir, str): 297 symfs_dir = Path(symfs_dir) 298 if not symfs_dir.is_dir(): 299 logging.error("can't find dir %s", symfs_dir) 300 return False 301 lib_dirs.append(symfs_dir) 302 lib_dir_source = BinarySourceFromLibDirs(self.readelf, lib_dirs) 303 lib_dir_source.collect_binaries(self.binaries, self.binary_cache) 304 return True 305 306 def pull_binaries_from_device(self): 307 self.device_source.collect_binaries(self.binaries, self.binary_cache) 308 309 def create_build_id_list(self): 310 """ Create build_id_list. So report scripts can find a binary by its build_id instead of 311 path. 312 """ 313 build_id_list_path = self.binary_cache_dir / 'build_id_list' 314 # Write in binary mode to avoid "\r\n" problem on windows, which can confuse simpleperf. 315 with open(build_id_list_path, 'wb') as fh: 316 for root, _, files in os.walk(self.binary_cache_dir): 317 for filename in files: 318 path = Path(os.path.join(root, filename)) 319 build_id = self.readelf.get_build_id(path) 320 if build_id: 321 relative_path = path.relative_to(self.binary_cache_dir) 322 line = f'{build_id}={relative_path}\n' 323 fh.write(str_to_bytes(line)) 324 325 def find_path_in_cache(self, device_path: str) -> Optional[Path]: 326 build_id = self.binaries.get(device_path) 327 return self.binary_cache.get_path_in_cache(device_path, build_id) 328 329 330def main() -> bool: 331 parser = BaseArgumentParser(description=""" 332 Pull binaries needed by perf.data from device to binary_cache directory.""") 333 parser.add_argument('-i', '--perf_data_path', default='perf.data', type=extant_file, help=""" 334 The path of profiling data.""") 335 parser.add_argument('-lib', '--native_lib_dir', type=extant_dir, nargs='+', help=""" 336 Path to find debug version of native shared libraries used in the app.""", action='append') 337 parser.add_argument('--disable_adb_root', action='store_true', help=""" 338 Force adb to run in non root mode.""") 339 parser.add_argument('--ndk_path', nargs=1, help='Find tools in the ndk path.') 340 args = parser.parse_args() 341 ndk_path = None if not args.ndk_path else args.ndk_path[0] 342 builder = BinaryCacheBuilder(ndk_path, args.disable_adb_root) 343 symfs_dirs = flatten_arg_list(args.native_lib_dir) 344 return builder.build_binary_cache(args.perf_data_path, symfs_dirs) 345 346 347if __name__ == '__main__': 348 sys.exit(0 if main() else 1) 349