1 #!/usr/bin/env python3 2 # 3 # Copyright (C) 2016 The Android Open Source Project 4 # 5 # Licensed under the Apache License, Version 2.0 (the "License"); 6 # you may not use this file except in compliance with the License. 7 # You may obtain a copy of the License at 8 # 9 # http://www.apache.org/licenses/LICENSE-2.0 10 # 11 # Unless required by applicable law or agreed to in writing, software 12 # distributed under the License is distributed on an "AS IS" BASIS, 13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 # See the License for the specific language governing permissions and 15 # limitations under the License. 16 # 17 18 """binary_cache_builder.py: read perf.data, collect binaries needed by 19 it, and put them in binary_cache. 20 """ 21 22 from collections import defaultdict 23 import logging 24 import os 25 import os.path 26 from pathlib import Path 27 import shutil 28 import sys 29 from typing import Dict, List, Optional, Tuple, Union 30 31 from simpleperf_report_lib import ReportLib 32 from simpleperf_utils import ( 33 AdbHelper, BaseArgumentParser, extant_dir, extant_file, flatten_arg_list, 34 ReadElf, str_to_bytes) 35 36 37 def is_jit_symfile(dso_name): 38 return dso_name.split('/')[-1].startswith('TemporaryFile') 39 40 41 class BinaryCache: 42 def __init__(self, binary_dir: Path): 43 self.binary_dir = binary_dir 44 45 def get_path_in_cache(self, device_path: str, build_id: str) -> Path: 46 """ Given a binary path in perf.data, return its corresponding path in the cache. 47 """ 48 if build_id: 49 filename = device_path.split('/')[-1] 50 # Add build id to make the filename unique. 51 unique_filename = build_id[2:] + '-' + filename 52 return self.binary_dir / unique_filename 53 54 # For elf file without build id, we can only follow its path on device. Otherwise, 55 # simpleperf can't find it. However, we don't prefer this way. Because: 56 # 1) It doesn't work for native libs loaded directly from apk 57 # (android:extractNativeLibs=”false”). 58 # 2) It may exceed path limit on windows. 59 if device_path.startswith('/'): 60 device_path = device_path[1:] 61 device_path = device_path.replace('/', os.sep) 62 return Path(os.path.join(self.binary_dir, device_path)) 63 64 65 class BinarySource: 66 """ Source to find debug binaries. """ 67 68 def __init__(self, readelf: ReadElf): 69 self.readelf = readelf 70 71 def collect_binaries(self, binaries: Dict[str, str], binary_cache: BinaryCache): 72 """ pull binaries needed in perf.data to binary_cache. 73 binaries: maps from binary path to its build_id in perf.data. 74 """ 75 raise Exception('not implemented') 76 77 def read_build_id(self, path: Path): 78 return self.readelf.get_build_id(path) 79 80 81 class BinarySourceFromDevice(BinarySource): 82 """ Pull binaries from device. """ 83 84 def __init__(self, readelf: ReadElf, disable_adb_root: bool): 85 super().__init__(readelf) 86 self.adb = AdbHelper(enable_switch_to_root=not disable_adb_root) 87 88 def collect_binaries(self, binaries: Dict[str, str], binary_cache: BinaryCache): 89 if not self.adb.is_device_available(): 90 return 91 for path, build_id in binaries.items(): 92 self.collect_binary(path, build_id, binary_cache) 93 self.pull_kernel_symbols(binary_cache.binary_dir / 'kallsyms') 94 95 def collect_binary(self, path: str, build_id: str, binary_cache: BinaryCache): 96 if not path.startswith('/') or path == "//anon" or path.startswith("/dev/"): 97 # [kernel.kallsyms] or unknown, or something we can't find binary. 98 return 99 binary_cache_file = binary_cache.get_path_in_cache(path, build_id) 100 self.check_and_pull_binary(path, build_id, binary_cache_file) 101 102 def check_and_pull_binary(self, path: str, expected_build_id: str, binary_cache_file: Path): 103 """If the binary_cache_file exists and has the expected_build_id, there 104 is no need to pull the binary from device. Otherwise, pull it. 105 """ 106 if binary_cache_file.is_file() and ( 107 not expected_build_id or expected_build_id == self.read_build_id(binary_cache_file) 108 ): 109 logging.info('use current file in binary_cache: %s', binary_cache_file) 110 else: 111 logging.info('pull file to binary_cache: %s to %s', path, binary_cache_file) 112 target_dir = binary_cache_file.parent 113 if not target_dir.is_dir(): 114 os.makedirs(target_dir) 115 if binary_cache_file.is_file(): 116 binary_cache_file.unlink() 117 self.pull_file_from_device(path, binary_cache_file) 118 119 def pull_file_from_device(self, device_path: str, host_path: Path): 120 if self.adb.run(['pull', device_path, str(host_path)]): 121 return True 122 # On non-root devices, we can't pull /data/app/XXX/base.odex directly. 123 # Instead, we can first copy the file to /data/local/tmp, then pull it. 124 filename = device_path[device_path.rfind('/')+1:] 125 if (self.adb.run(['shell', 'cp', device_path, '/data/local/tmp']) and 126 self.adb.run(['pull', '/data/local/tmp/' + filename, host_path])): 127 self.adb.run(['shell', 'rm', '/data/local/tmp/' + filename]) 128 return True 129 logging.warning('failed to pull %s from device', device_path) 130 return False 131 132 def pull_kernel_symbols(self, file_path: Path): 133 if file_path.is_file(): 134 file_path.unlink() 135 if self.adb.switch_to_root(): 136 self.adb.run(['shell', 'echo', '0', '>/proc/sys/kernel/kptr_restrict']) 137 self.adb.run(['pull', '/proc/kallsyms', file_path]) 138 139 140 class BinarySourceFromLibDirs(BinarySource): 141 """ Collect binaries from lib dirs. """ 142 143 def __init__(self, readelf: ReadElf, lib_dirs: List[Path]): 144 super().__init__(readelf) 145 self.lib_dirs = lib_dirs 146 self.filename_map = None 147 self.build_id_map = None 148 self.binary_cache = None 149 150 def collect_binaries(self, binaries: Dict[str, str], binary_cache: BinaryCache): 151 self.create_filename_map(binaries) 152 self.create_build_id_map(binaries) 153 self.binary_cache = binary_cache 154 155 # Search all files in lib_dirs, and copy matching files to build_cache. 156 for lib_dir in self.lib_dirs: 157 if self.is_platform_symbols_dir(lib_dir): 158 self.search_platform_symbols_dir(lib_dir) 159 else: 160 self.search_dir(lib_dir) 161 162 def create_filename_map(self, binaries: Dict[str, str]): 163 """ Create a map mapping from filename to binaries having the name. """ 164 self.filename_map = defaultdict(list) 165 for path, build_id in binaries.items(): 166 index = path.rfind('/') 167 filename = path[index + 1:] 168 self.filename_map[filename].append((path, build_id)) 169 170 def create_build_id_map(self, binaries: Dict[str, str]): 171 """ Create a map mapping from build id to binary path. """ 172 self.build_id_map = {} 173 for path, build_id in binaries.items(): 174 if build_id: 175 self.build_id_map[build_id] = path 176 177 def is_platform_symbols_dir(self, lib_dir: Path): 178 """ Check if lib_dir points to $ANDROID_PRODUCT_OUT/symbols. """ 179 subdir_names = [p.name for p in lib_dir.iterdir()] 180 return lib_dir.name == 'symbols' and 'system' in subdir_names 181 182 def search_platform_symbols_dir(self, lib_dir: Path): 183 """ Platform symbols dir contains too many binaries. Reading build ids for 184 all of them takes a long time. So we only read build ids for binaries 185 having names exist in filename_map. 186 """ 187 for root, _, files in os.walk(lib_dir): 188 for filename in files: 189 binaries = self.filename_map.get(filename) 190 if not binaries: 191 continue 192 file_path = Path(os.path.join(root, filename)) 193 build_id = self.read_build_id(file_path) 194 for path, expected_build_id in binaries: 195 if expected_build_id == build_id: 196 self.copy_to_binary_cache(file_path, build_id, path) 197 198 def search_dir(self, lib_dir: Path): 199 """ For a normal lib dir, it's unlikely to contain many binaries. So we can read 200 build ids for all binaries in it. But users may give debug binaries with a name 201 different from the one recorded in perf.data. So we should only rely on build id 202 if it is available. 203 """ 204 for root, _, files in os.walk(lib_dir): 205 for filename in files: 206 file_path = Path(os.path.join(root, filename)) 207 build_id = self.read_build_id(file_path) 208 if build_id: 209 # For elf file with build id, use build id to match. 210 device_path = self.build_id_map.get(build_id) 211 if device_path: 212 self.copy_to_binary_cache(file_path, build_id, device_path) 213 elif self.readelf.is_elf_file(file_path): 214 # For elf file without build id, use filename to match. 215 for path, expected_build_id in self.filename_map.get(filename, []): 216 if not expected_build_id: 217 self.copy_to_binary_cache(file_path, '', path) 218 break 219 220 def copy_to_binary_cache( 221 self, from_path: Path, expected_build_id: str, device_path: str): 222 to_path = self.binary_cache.get_path_in_cache(device_path, expected_build_id) 223 if not self.need_to_copy(from_path, to_path, expected_build_id): 224 # The existing file in binary_cache can provide more information, so no need to copy. 225 return 226 to_dir = to_path.parent 227 if not to_dir.is_dir(): 228 os.makedirs(to_dir) 229 logging.info('copy to binary_cache: %s to %s', from_path, to_path) 230 shutil.copy(from_path, to_path) 231 232 def need_to_copy(self, from_path: Path, to_path: Path, expected_build_id: str): 233 if not to_path.is_file() or self.read_build_id(to_path) != expected_build_id: 234 return True 235 return self.get_file_stripped_level(from_path) < self.get_file_stripped_level(to_path) 236 237 def get_file_stripped_level(self, path: Path) -> int: 238 """Return stripped level of an ELF file. Larger value means more stripped.""" 239 sections = self.readelf.get_sections(path) 240 if '.debug_line' in sections: 241 return 0 242 if '.symtab' in sections: 243 return 1 244 return 2 245 246 247 class BinaryCacheBuilder: 248 """Collect all binaries needed by perf.data in binary_cache.""" 249 250 def __init__(self, ndk_path: Optional[str], disable_adb_root: bool): 251 self.readelf = ReadElf(ndk_path) 252 self.device_source = BinarySourceFromDevice(self.readelf, disable_adb_root) 253 self.binary_cache_dir = Path('binary_cache') 254 self.binary_cache = BinaryCache(self.binary_cache_dir) 255 self.binaries = {} 256 257 def build_binary_cache(self, perf_data_path: str, symfs_dirs: List[Union[Path, str]]) -> bool: 258 self.binary_cache_dir.mkdir(exist_ok=True) 259 self.collect_used_binaries(perf_data_path) 260 if not self.copy_binaries_from_symfs_dirs(symfs_dirs): 261 return False 262 self.pull_binaries_from_device() 263 self.create_build_id_list() 264 return True 265 266 def collect_used_binaries(self, perf_data_path): 267 """read perf.data, collect all used binaries and their build id(if available).""" 268 # A dict mapping from binary name to build_id 269 binaries = {} 270 lib = ReportLib() 271 lib.SetRecordFile(perf_data_path) 272 lib.SetLogSeverity('error') 273 while True: 274 sample = lib.GetNextSample() 275 if sample is None: 276 lib.Close() 277 break 278 symbols = [lib.GetSymbolOfCurrentSample()] 279 callchain = lib.GetCallChainOfCurrentSample() 280 for i in range(callchain.nr): 281 symbols.append(callchain.entries[i].symbol) 282 283 for symbol in symbols: 284 dso_name = symbol.dso_name 285 if dso_name not in binaries: 286 if is_jit_symfile(dso_name): 287 continue 288 name = 'vmlinux' if dso_name == '[kernel.kallsyms]' else dso_name 289 binaries[name] = lib.GetBuildIdForPath(dso_name) 290 self.binaries = binaries 291 292 def copy_binaries_from_symfs_dirs(self, symfs_dirs: List[Union[str, Path]]) -> bool: 293 if symfs_dirs: 294 lib_dirs: List[Path] = [] 295 for symfs_dir in symfs_dirs: 296 if isinstance(symfs_dir, str): 297 symfs_dir = Path(symfs_dir) 298 if not symfs_dir.is_dir(): 299 logging.error("can't find dir %s", symfs_dir) 300 return False 301 lib_dirs.append(symfs_dir) 302 lib_dir_source = BinarySourceFromLibDirs(self.readelf, lib_dirs) 303 lib_dir_source.collect_binaries(self.binaries, self.binary_cache) 304 return True 305 306 def pull_binaries_from_device(self): 307 self.device_source.collect_binaries(self.binaries, self.binary_cache) 308 309 def create_build_id_list(self): 310 """ Create build_id_list. So report scripts can find a binary by its build_id instead of 311 path. 312 """ 313 build_id_list_path = self.binary_cache_dir / 'build_id_list' 314 # Write in binary mode to avoid "\r\n" problem on windows, which can confuse simpleperf. 315 with open(build_id_list_path, 'wb') as fh: 316 for root, _, files in os.walk(self.binary_cache_dir): 317 for filename in files: 318 path = Path(os.path.join(root, filename)) 319 build_id = self.readelf.get_build_id(path) 320 if build_id: 321 relative_path = path.relative_to(self.binary_cache_dir) 322 line = f'{build_id}={relative_path}\n' 323 fh.write(str_to_bytes(line)) 324 325 def find_path_in_cache(self, device_path: str) -> Optional[Path]: 326 build_id = self.binaries.get(device_path) 327 return self.binary_cache.get_path_in_cache(device_path, build_id) 328 329 330 def main() -> bool: 331 parser = BaseArgumentParser(description=""" 332 Pull binaries needed by perf.data from device to binary_cache directory.""") 333 parser.add_argument('-i', '--perf_data_path', default='perf.data', type=extant_file, help=""" 334 The path of profiling data.""") 335 parser.add_argument('-lib', '--native_lib_dir', type=extant_dir, nargs='+', help=""" 336 Path to find debug version of native shared libraries used in the app.""", action='append') 337 parser.add_argument('--disable_adb_root', action='store_true', help=""" 338 Force adb to run in non root mode.""") 339 parser.add_argument('--ndk_path', nargs=1, help='Find tools in the ndk path.') 340 args = parser.parse_args() 341 ndk_path = None if not args.ndk_path else args.ndk_path[0] 342 builder = BinaryCacheBuilder(ndk_path, args.disable_adb_root) 343 symfs_dirs = flatten_arg_list(args.native_lib_dir) 344 return builder.build_binary_cache(args.perf_data_path, symfs_dirs) 345 346 347 if __name__ == '__main__': 348 sys.exit(0 if main() else 1) 349