• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 #!/usr/bin/env python3
2 #
3 # Copyright (C) 2016 The Android Open Source Project
4 #
5 # Licensed under the Apache License, Version 2.0 (the "License");
6 # you may not use this file except in compliance with the License.
7 # You may obtain a copy of the License at
8 #
9 #      http://www.apache.org/licenses/LICENSE-2.0
10 #
11 # Unless required by applicable law or agreed to in writing, software
12 # distributed under the License is distributed on an "AS IS" BASIS,
13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 # See the License for the specific language governing permissions and
15 # limitations under the License.
16 #
17 
18 """binary_cache_builder.py: read perf.data, collect binaries needed by
19     it, and put them in binary_cache.
20 """
21 
22 from collections import defaultdict
23 import logging
24 import os
25 import os.path
26 from pathlib import Path
27 import shutil
28 import sys
29 from typing import Dict, List, Optional, Tuple, Union
30 
31 from simpleperf_report_lib import ReportLib
32 from simpleperf_utils import (
33     AdbHelper, BaseArgumentParser, extant_dir, extant_file, flatten_arg_list,
34     ReadElf, str_to_bytes)
35 
36 
37 def is_jit_symfile(dso_name):
38     return dso_name.split('/')[-1].startswith('TemporaryFile')
39 
40 
41 class BinaryCache:
42     def __init__(self, binary_dir: Path):
43         self.binary_dir = binary_dir
44 
45     def get_path_in_cache(self, device_path: str, build_id: str) -> Path:
46         """ Given a binary path in perf.data, return its corresponding path in the cache.
47         """
48         if build_id:
49             filename = device_path.split('/')[-1]
50             # Add build id to make the filename unique.
51             unique_filename = build_id[2:] + '-' + filename
52             return self.binary_dir / unique_filename
53 
54         # For elf file without build id, we can only follow its path on device. Otherwise,
55         # simpleperf can't find it. However, we don't prefer this way. Because:
56         # 1) It doesn't work for native libs loaded directly from apk
57         #    (android:extractNativeLibs=”false”).
58         # 2) It may exceed path limit on windows.
59         if device_path.startswith('/'):
60             device_path = device_path[1:]
61         device_path = device_path.replace('/', os.sep)
62         return Path(os.path.join(self.binary_dir, device_path))
63 
64 
65 class BinarySource:
66     """ Source to find debug binaries. """
67 
68     def __init__(self, readelf: ReadElf):
69         self.readelf = readelf
70 
71     def collect_binaries(self, binaries: Dict[str, str], binary_cache: BinaryCache):
72         """ pull binaries needed in perf.data to binary_cache.
73             binaries: maps from binary path to its build_id in perf.data.
74         """
75         raise Exception('not implemented')
76 
77     def read_build_id(self, path: Path):
78         return self.readelf.get_build_id(path)
79 
80 
81 class BinarySourceFromDevice(BinarySource):
82     """ Pull binaries from device. """
83 
84     def __init__(self, readelf: ReadElf, disable_adb_root: bool):
85         super().__init__(readelf)
86         self.adb = AdbHelper(enable_switch_to_root=not disable_adb_root)
87 
88     def collect_binaries(self, binaries: Dict[str, str], binary_cache: BinaryCache):
89         if not self.adb.is_device_available():
90             return
91         for path, build_id in binaries.items():
92             self.collect_binary(path, build_id, binary_cache)
93         self.pull_kernel_symbols(binary_cache.binary_dir / 'kallsyms')
94 
95     def collect_binary(self, path: str, build_id: str, binary_cache: BinaryCache):
96         if not path.startswith('/') or path == "//anon" or path.startswith("/dev/"):
97             # [kernel.kallsyms] or unknown, or something we can't find binary.
98             return
99         binary_cache_file = binary_cache.get_path_in_cache(path, build_id)
100         self.check_and_pull_binary(path, build_id, binary_cache_file)
101 
102     def check_and_pull_binary(self, path: str, expected_build_id: str, binary_cache_file: Path):
103         """If the binary_cache_file exists and has the expected_build_id, there
104            is no need to pull the binary from device. Otherwise, pull it.
105         """
106         if binary_cache_file.is_file() and (
107                 not expected_build_id or expected_build_id == self.read_build_id(binary_cache_file)
108         ):
109             logging.info('use current file in binary_cache: %s', binary_cache_file)
110         else:
111             logging.info('pull file to binary_cache: %s to %s', path, binary_cache_file)
112             target_dir = binary_cache_file.parent
113             if not target_dir.is_dir():
114                 os.makedirs(target_dir)
115             if binary_cache_file.is_file():
116                 binary_cache_file.unlink()
117             self.pull_file_from_device(path, binary_cache_file)
118 
119     def pull_file_from_device(self, device_path: str, host_path: Path):
120         if self.adb.run(['pull', device_path, str(host_path)]):
121             return True
122         # On non-root devices, we can't pull /data/app/XXX/base.odex directly.
123         # Instead, we can first copy the file to /data/local/tmp, then pull it.
124         filename = device_path[device_path.rfind('/')+1:]
125         if (self.adb.run(['shell', 'cp', device_path, '/data/local/tmp']) and
126                 self.adb.run(['pull', '/data/local/tmp/' + filename, host_path])):
127             self.adb.run(['shell', 'rm', '/data/local/tmp/' + filename])
128             return True
129         logging.warning('failed to pull %s from device', device_path)
130         return False
131 
132     def pull_kernel_symbols(self, file_path: Path):
133         if file_path.is_file():
134             file_path.unlink()
135         if self.adb.switch_to_root():
136             self.adb.run(['shell', 'echo', '0', '>/proc/sys/kernel/kptr_restrict'])
137             self.adb.run(['pull', '/proc/kallsyms', file_path])
138 
139 
140 class BinarySourceFromLibDirs(BinarySource):
141     """ Collect binaries from lib dirs. """
142 
143     def __init__(self, readelf: ReadElf, lib_dirs: List[Path]):
144         super().__init__(readelf)
145         self.lib_dirs = lib_dirs
146         self.filename_map = None
147         self.build_id_map = None
148         self.binary_cache = None
149 
150     def collect_binaries(self, binaries: Dict[str, str], binary_cache: BinaryCache):
151         self.create_filename_map(binaries)
152         self.create_build_id_map(binaries)
153         self.binary_cache = binary_cache
154 
155         # Search all files in lib_dirs, and copy matching files to build_cache.
156         for lib_dir in self.lib_dirs:
157             if self.is_platform_symbols_dir(lib_dir):
158                 self.search_platform_symbols_dir(lib_dir)
159             else:
160                 self.search_dir(lib_dir)
161 
162     def create_filename_map(self, binaries: Dict[str, str]):
163         """ Create a map mapping from filename to binaries having the name. """
164         self.filename_map = defaultdict(list)
165         for path, build_id in binaries.items():
166             index = path.rfind('/')
167             filename = path[index + 1:]
168             self.filename_map[filename].append((path, build_id))
169 
170     def create_build_id_map(self, binaries: Dict[str, str]):
171         """ Create a map mapping from build id to binary path. """
172         self.build_id_map = {}
173         for path, build_id in binaries.items():
174             if build_id:
175                 self.build_id_map[build_id] = path
176 
177     def is_platform_symbols_dir(self, lib_dir: Path):
178         """ Check if lib_dir points to $ANDROID_PRODUCT_OUT/symbols. """
179         subdir_names = [p.name for p in lib_dir.iterdir()]
180         return lib_dir.name == 'symbols' and 'system' in subdir_names
181 
182     def search_platform_symbols_dir(self, lib_dir: Path):
183         """ Platform symbols dir contains too many binaries. Reading build ids for
184             all of them takes a long time. So we only read build ids for binaries
185             having names exist in filename_map.
186         """
187         for root, _, files in os.walk(lib_dir):
188             for filename in files:
189                 binaries = self.filename_map.get(filename)
190                 if not binaries:
191                     continue
192                 file_path = Path(os.path.join(root, filename))
193                 build_id = self.read_build_id(file_path)
194                 for path, expected_build_id in binaries:
195                     if expected_build_id == build_id:
196                         self.copy_to_binary_cache(file_path, build_id, path)
197 
198     def search_dir(self, lib_dir: Path):
199         """ For a normal lib dir, it's unlikely to contain many binaries. So we can read
200             build ids for all binaries in it. But users may give debug binaries with a name
201             different from the one recorded in perf.data. So we should only rely on build id
202             if it is available.
203         """
204         for root, _, files in os.walk(lib_dir):
205             for filename in files:
206                 file_path = Path(os.path.join(root, filename))
207                 build_id = self.read_build_id(file_path)
208                 if build_id:
209                     # For elf file with build id, use build id to match.
210                     device_path = self.build_id_map.get(build_id)
211                     if device_path:
212                         self.copy_to_binary_cache(file_path, build_id, device_path)
213                 elif self.readelf.is_elf_file(file_path):
214                     # For elf file without build id, use filename to match.
215                     for path, expected_build_id in self.filename_map.get(filename, []):
216                         if not expected_build_id:
217                             self.copy_to_binary_cache(file_path, '', path)
218                             break
219 
220     def copy_to_binary_cache(
221             self, from_path: Path, expected_build_id: str, device_path: str):
222         to_path = self.binary_cache.get_path_in_cache(device_path, expected_build_id)
223         if not self.need_to_copy(from_path, to_path, expected_build_id):
224             # The existing file in binary_cache can provide more information, so no need to copy.
225             return
226         to_dir = to_path.parent
227         if not to_dir.is_dir():
228             os.makedirs(to_dir)
229         logging.info('copy to binary_cache: %s to %s', from_path, to_path)
230         shutil.copy(from_path, to_path)
231 
232     def need_to_copy(self, from_path: Path, to_path: Path, expected_build_id: str):
233         if not to_path.is_file() or self.read_build_id(to_path) != expected_build_id:
234             return True
235         return self.get_file_stripped_level(from_path) < self.get_file_stripped_level(to_path)
236 
237     def get_file_stripped_level(self, path: Path) -> int:
238         """Return stripped level of an ELF file. Larger value means more stripped."""
239         sections = self.readelf.get_sections(path)
240         if '.debug_line' in sections:
241             return 0
242         if '.symtab' in sections:
243             return 1
244         return 2
245 
246 
247 class BinaryCacheBuilder:
248     """Collect all binaries needed by perf.data in binary_cache."""
249 
250     def __init__(self, ndk_path: Optional[str], disable_adb_root: bool):
251         self.readelf = ReadElf(ndk_path)
252         self.device_source = BinarySourceFromDevice(self.readelf, disable_adb_root)
253         self.binary_cache_dir = Path('binary_cache')
254         self.binary_cache = BinaryCache(self.binary_cache_dir)
255         self.binaries = {}
256 
257     def build_binary_cache(self, perf_data_path: str, symfs_dirs: List[Union[Path, str]]) -> bool:
258         self.binary_cache_dir.mkdir(exist_ok=True)
259         self.collect_used_binaries(perf_data_path)
260         if not self.copy_binaries_from_symfs_dirs(symfs_dirs):
261             return False
262         self.pull_binaries_from_device()
263         self.create_build_id_list()
264         return True
265 
266     def collect_used_binaries(self, perf_data_path):
267         """read perf.data, collect all used binaries and their build id(if available)."""
268         # A dict mapping from binary name to build_id
269         binaries = {}
270         lib = ReportLib()
271         lib.SetRecordFile(perf_data_path)
272         lib.SetLogSeverity('error')
273         while True:
274             sample = lib.GetNextSample()
275             if sample is None:
276                 lib.Close()
277                 break
278             symbols = [lib.GetSymbolOfCurrentSample()]
279             callchain = lib.GetCallChainOfCurrentSample()
280             for i in range(callchain.nr):
281                 symbols.append(callchain.entries[i].symbol)
282 
283             for symbol in symbols:
284                 dso_name = symbol.dso_name
285                 if dso_name not in binaries:
286                     if is_jit_symfile(dso_name):
287                         continue
288                     name = 'vmlinux' if dso_name == '[kernel.kallsyms]' else dso_name
289                     binaries[name] = lib.GetBuildIdForPath(dso_name)
290         self.binaries = binaries
291 
292     def copy_binaries_from_symfs_dirs(self, symfs_dirs: List[Union[str, Path]]) -> bool:
293         if symfs_dirs:
294             lib_dirs: List[Path] = []
295             for symfs_dir in symfs_dirs:
296                 if isinstance(symfs_dir, str):
297                     symfs_dir = Path(symfs_dir)
298                 if not symfs_dir.is_dir():
299                     logging.error("can't find dir %s", symfs_dir)
300                     return False
301                 lib_dirs.append(symfs_dir)
302             lib_dir_source = BinarySourceFromLibDirs(self.readelf, lib_dirs)
303             lib_dir_source.collect_binaries(self.binaries, self.binary_cache)
304         return True
305 
306     def pull_binaries_from_device(self):
307         self.device_source.collect_binaries(self.binaries, self.binary_cache)
308 
309     def create_build_id_list(self):
310         """ Create build_id_list. So report scripts can find a binary by its build_id instead of
311             path.
312         """
313         build_id_list_path = self.binary_cache_dir / 'build_id_list'
314         # Write in binary mode to avoid "\r\n" problem on windows, which can confuse simpleperf.
315         with open(build_id_list_path, 'wb') as fh:
316             for root, _, files in os.walk(self.binary_cache_dir):
317                 for filename in files:
318                     path = Path(os.path.join(root, filename))
319                     build_id = self.readelf.get_build_id(path)
320                     if build_id:
321                         relative_path = path.relative_to(self.binary_cache_dir)
322                         line = f'{build_id}={relative_path}\n'
323                         fh.write(str_to_bytes(line))
324 
325     def find_path_in_cache(self, device_path: str) -> Optional[Path]:
326         build_id = self.binaries.get(device_path)
327         return self.binary_cache.get_path_in_cache(device_path, build_id)
328 
329 
330 def main() -> bool:
331     parser = BaseArgumentParser(description="""
332         Pull binaries needed by perf.data from device to binary_cache directory.""")
333     parser.add_argument('-i', '--perf_data_path', default='perf.data', type=extant_file, help="""
334         The path of profiling data.""")
335     parser.add_argument('-lib', '--native_lib_dir', type=extant_dir, nargs='+', help="""
336         Path to find debug version of native shared libraries used in the app.""", action='append')
337     parser.add_argument('--disable_adb_root', action='store_true', help="""
338         Force adb to run in non root mode.""")
339     parser.add_argument('--ndk_path', nargs=1, help='Find tools in the ndk path.')
340     args = parser.parse_args()
341     ndk_path = None if not args.ndk_path else args.ndk_path[0]
342     builder = BinaryCacheBuilder(ndk_path, args.disable_adb_root)
343     symfs_dirs = flatten_arg_list(args.native_lib_dir)
344     return builder.build_binary_cache(args.perf_data_path, symfs_dirs)
345 
346 
347 if __name__ == '__main__':
348     sys.exit(0 if main() else 1)
349