• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#!/usr/bin/env python3
2#
3# Copyright (C) 2016 The Android Open Source Project
4#
5# Licensed under the Apache License, Version 2.0 (the "License");
6# you may not use this file except in compliance with the License.
7# You may obtain a copy of the License at
8#
9#      http://www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing, software
12# distributed under the License is distributed on an "AS IS" BASIS,
13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14# See the License for the specific language governing permissions and
15# limitations under the License.
16#
17
18"""utils.py: export utility functions.
19"""
20
21from __future__ import annotations
22import argparse
23from concurrent.futures import Future, ThreadPoolExecutor
24from dataclasses import dataclass
25import logging
26import os
27import os.path
28from pathlib import Path
29import re
30import shutil
31import subprocess
32import sys
33import time
34from typing import Any, Dict, Iterator, List, Optional, Set, Tuple, Union
35
36
37NDK_ERROR_MESSAGE = "Please install the Android NDK (https://developer.android.com/studio/projects/install-ndk), then set NDK path with --ndk_path option."
38
39
40def get_script_dir() -> str:
41    return os.path.dirname(os.path.realpath(__file__))
42
43
44def is_windows() -> bool:
45    return sys.platform == 'win32' or sys.platform == 'cygwin'
46
47
48def is_darwin() -> bool:
49    return sys.platform == 'darwin'
50
51
52def get_platform() -> str:
53    if is_windows():
54        return 'windows'
55    if is_darwin():
56        return 'darwin'
57    return 'linux'
58
59
60def str_to_bytes(str_value: str) -> bytes:
61    # In python 3, str are wide strings whereas the C api expects 8 bit strings,
62    # hence we have to convert. For now using utf-8 as the encoding.
63    return str_value.encode('utf-8')
64
65
66def bytes_to_str(bytes_value: Optional[bytes]) -> str:
67    if not bytes_value:
68        return ''
69    return bytes_value.decode('utf-8')
70
71
72def get_target_binary_path(arch: str, binary_name: str) -> str:
73    if arch == 'aarch64':
74        arch = 'arm64'
75    arch_dir = os.path.join(get_script_dir(), "bin", "android", arch)
76    if not os.path.isdir(arch_dir):
77        log_fatal("can't find arch directory: %s" % arch_dir)
78    binary_path = os.path.join(arch_dir, binary_name)
79    if not os.path.isfile(binary_path):
80        log_fatal("can't find binary: %s" % binary_path)
81    return binary_path
82
83
84def get_host_binary_path(binary_name: str) -> str:
85    dirname = os.path.join(get_script_dir(), 'bin')
86    if is_windows():
87        if binary_name.endswith('.so'):
88            binary_name = binary_name[0:-3] + '.dll'
89        elif '.' not in binary_name:
90            binary_name += '.exe'
91        dirname = os.path.join(dirname, 'windows')
92    elif sys.platform == 'darwin':  # OSX
93        if binary_name.endswith('.so'):
94            binary_name = binary_name[0:-3] + '.dylib'
95        dirname = os.path.join(dirname, 'darwin')
96    else:
97        dirname = os.path.join(dirname, 'linux')
98    dirname = os.path.join(dirname, 'x86_64' if sys.maxsize > 2 ** 32 else 'x86')
99    binary_path = os.path.join(dirname, binary_name)
100    if not os.path.isfile(binary_path):
101        log_fatal("can't find binary: %s" % binary_path)
102    return binary_path
103
104
105def is_executable_available(executable: str, option='--help') -> bool:
106    """ Run an executable to see if it exists. """
107    try:
108        subproc = subprocess.Popen([executable, option], stdout=subprocess.PIPE,
109                                   stderr=subprocess.PIPE)
110        subproc.communicate()
111        return subproc.returncode == 0
112    except OSError:
113        return False
114
115
116class ToolFinder:
117    """ Find tools in ndk or sdk. """
118    DEFAULT_SDK_PATH = {
119        'darwin': 'Library/Android/sdk',
120        'linux': 'Android/Sdk',
121        'windows': 'AppData/Local/Android/sdk',
122    }
123
124    EXPECTED_TOOLS = {
125        'adb': {
126            'is_binutils': False,
127            'test_option': 'version',
128            'path_in_sdk': 'platform-tools/adb',
129        },
130        'llvm-objdump': {
131            'is_binutils': False,
132            'path_in_ndk':
133                lambda platform: 'toolchains/llvm/prebuilt/%s-x86_64/bin/llvm-objdump' % platform,
134        },
135        'llvm-readelf': {
136            'is_binutils': False,
137            'path_in_ndk':
138                lambda platform: 'toolchains/llvm/prebuilt/%s-x86_64/bin/llvm-readelf' % platform,
139        },
140        'llvm-symbolizer': {
141            'is_binutils': False,
142            'path_in_ndk':
143                lambda platform: 'toolchains/llvm/prebuilt/%s-x86_64/bin/llvm-symbolizer' % platform,
144        },
145        'llvm-strip': {
146            'is_binutils': False,
147            'path_in_ndk':
148                lambda platform: 'toolchains/llvm/prebuilt/%s-x86_64/bin/llvm-strip' % platform,
149        },
150    }
151
152    @classmethod
153    def find_ndk_and_sdk_paths(cls, ndk_path: Optional[str] = None
154                               ) -> Iterator[Tuple[Optional[str], Optional[str]]]:
155        # Use the given ndk path.
156        if ndk_path and os.path.isdir(ndk_path):
157            ndk_path = os.path.abspath(ndk_path)
158            yield ndk_path, cls.find_sdk_path(ndk_path)
159        # Find ndk in the parent directory containing simpleperf scripts.
160        ndk_path = os.path.dirname(os.path.abspath(get_script_dir()))
161        yield ndk_path, cls.find_sdk_path(ndk_path)
162        # Find ndk in the default sdk installation path.
163        if is_windows():
164            home = os.environ.get('HOMEDRIVE') + os.environ.get('HOMEPATH')
165        else:
166            home = os.environ.get('HOME')
167        if home:
168            platform = get_platform()
169            sdk_path = os.path.join(home, cls.DEFAULT_SDK_PATH[platform].replace('/', os.sep))
170            if os.path.isdir(sdk_path):
171                path = os.path.join(sdk_path, 'ndk')
172                if os.path.isdir(path):
173                    # Android Studio can install multiple ndk versions in 'ndk'.
174                    # Find the newest one.
175                    ndk_version = None
176                    for name in os.listdir(path):
177                        if not ndk_version or ndk_version < name:
178                            ndk_version = name
179                    if ndk_version:
180                        yield os.path.join(path, ndk_version), sdk_path
181            ndk_path = os.path.join(sdk_path, 'ndk-bundle')
182            if os.path.isdir(ndk_path):
183                yield ndk_path, sdk_path
184
185    @classmethod
186    def find_sdk_path(cls, ndk_path: str) -> Optional[str]:
187        path = ndk_path
188        for _ in range(2):
189            path = os.path.dirname(path)
190            if os.path.isdir(os.path.join(path, 'platform-tools')):
191                return path
192        return None
193
194    @classmethod
195    def _get_binutils_path_in_ndk(cls, toolname: str, arch: Optional[str], platform: str
196                                  ) -> Tuple[str, str]:
197        if not arch:
198            arch = 'arm64'
199        if arch == 'arm64':
200            name = 'aarch64-linux-android-' + toolname
201        elif arch == 'arm':
202            name = 'arm-linux-androideabi-' + toolname
203        elif arch == 'x86_64':
204            name = 'x86_64-linux-android-' + toolname
205        elif arch == 'x86':
206            name = 'i686-linux-android-' + toolname
207        else:
208            log_fatal('unexpected arch %s' % arch)
209        path = 'toolchains/llvm/prebuilt/%s-x86_64/bin/%s' % (platform, name)
210        return (name, path)
211
212    @classmethod
213    def find_tool_path(cls, toolname: str, ndk_path: Optional[str] = None,
214                       arch: Optional[str] = None) -> Optional[str]:
215        tool_info = cls.EXPECTED_TOOLS.get(toolname)
216        if not tool_info:
217            return None
218
219        is_binutils = tool_info['is_binutils']
220        test_option = tool_info.get('test_option', '--help')
221        platform = get_platform()
222
223        # Find tool in clang prebuilts in Android platform.
224        if toolname.startswith('llvm-') and platform == 'linux' and get_script_dir().endswith(
225                'system/extras/simpleperf/scripts'):
226            path = str(
227                Path(get_script_dir()).parents[3] / 'prebuilts' / 'clang' / 'host' / 'linux-x86' /
228                'llvm-binutils-stable' / toolname)
229            if is_executable_available(path, test_option):
230                return path
231
232        # Find tool in NDK or SDK.
233        path_in_ndk = None
234        path_in_sdk = None
235        if is_binutils:
236            toolname_with_arch, path_in_ndk = cls._get_binutils_path_in_ndk(
237                toolname, arch, platform)
238        else:
239            toolname_with_arch = toolname
240            if 'path_in_ndk' in tool_info:
241                path_in_ndk = tool_info['path_in_ndk'](platform)
242            elif 'path_in_sdk' in tool_info:
243                path_in_sdk = tool_info['path_in_sdk']
244        if path_in_ndk:
245            path_in_ndk = path_in_ndk.replace('/', os.sep)
246        elif path_in_sdk:
247            path_in_sdk = path_in_sdk.replace('/', os.sep)
248
249        for ndk_dir, sdk_dir in cls.find_ndk_and_sdk_paths(ndk_path):
250            if path_in_ndk and ndk_dir:
251                path = os.path.join(ndk_dir, path_in_ndk)
252                if is_executable_available(path, test_option):
253                    return path
254            elif path_in_sdk and sdk_dir:
255                path = os.path.join(sdk_dir, path_in_sdk)
256                if is_executable_available(path, test_option):
257                    return path
258
259        # Find tool in $PATH.
260        if is_executable_available(toolname_with_arch, test_option):
261            return toolname_with_arch
262
263        # Find tool without arch in $PATH.
264        if is_binutils and tool_info.get('accept_tool_without_arch'):
265            if is_executable_available(toolname, test_option):
266                return toolname
267        return None
268
269
270class AdbHelper(object):
271    def __init__(self, enable_switch_to_root: bool = True):
272        adb_path = ToolFinder.find_tool_path('adb')
273        if not adb_path:
274            log_exit("Can't find adb in PATH environment.")
275        self.adb_path: str = adb_path
276        self.enable_switch_to_root = enable_switch_to_root
277        self.serial_number: Optional[str] = None
278
279    def is_device_available(self) -> bool:
280        return self.run_and_return_output(['shell', 'whoami'])[0]
281
282    def run(self, adb_args: List[str], log_output: bool = False, log_stderr: bool = False) -> bool:
283        return self.run_and_return_output(adb_args, log_output, log_stderr)[0]
284
285    def run_and_return_output(self, adb_args: List[str], log_output: bool = False,
286                              log_stderr: bool = False) -> Tuple[bool, str]:
287        adb_args = [self.adb_path] + adb_args
288        logging.debug('run adb cmd: %s' % adb_args)
289        env = None
290        if self.serial_number:
291            env = os.environ.copy()
292            env['ANDROID_SERIAL'] = self.serial_number
293        subproc = subprocess.Popen(
294            adb_args, env=env, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
295        stdout_data, stderr_data = subproc.communicate()
296        stdout_data = bytes_to_str(stdout_data)
297        stderr_data = bytes_to_str(stderr_data)
298        returncode = subproc.returncode
299        result = (returncode == 0)
300        if log_output and stdout_data:
301            logging.debug(stdout_data)
302        if log_stderr and stderr_data:
303            logging.warning(stderr_data)
304        logging.debug('run adb cmd: %s  [result %s]' % (adb_args, result))
305        return (result, stdout_data)
306
307    def check_run(self, adb_args: List[str], log_output: bool = False):
308        self.check_run_and_return_output(adb_args, log_output)
309
310    def check_run_and_return_output(self, adb_args: List[str], log_output: bool = False,
311                                    log_stderr: bool = False) -> str:
312        result, stdoutdata = self.run_and_return_output(adb_args, log_output, True)
313        if not result:
314            log_exit('run "adb %s" failed: %s' % (adb_args, stdoutdata))
315        return stdoutdata
316
317    def _unroot(self):
318        result, stdoutdata = self.run_and_return_output(['shell', 'whoami'])
319        if not result:
320            return
321        if 'root' not in stdoutdata:
322            return
323        logging.info('unroot adb')
324        self.run(['unroot'])
325        time.sleep(1)
326        self.run(['wait-for-device'])
327
328    def switch_to_root(self) -> bool:
329        if not self.enable_switch_to_root:
330            self._unroot()
331            return False
332        result, stdoutdata = self.run_and_return_output(['shell', 'whoami'])
333        if not result:
334            return False
335        if 'root' in stdoutdata:
336            return True
337        build_type = self.get_property('ro.build.type')
338        if build_type == 'user':
339            return False
340        self.run(['root'])
341        time.sleep(1)
342        self.run(['wait-for-device'])
343        result, stdoutdata = self.run_and_return_output(['shell', 'whoami'])
344        return result and 'root' in stdoutdata
345
346    def get_property(self, name: str) -> Optional[str]:
347        result, stdoutdata = self.run_and_return_output(['shell', 'getprop', name])
348        return stdoutdata.strip() if result else None
349
350    def set_property(self, name: str, value: str) -> bool:
351        return self.run(['shell', 'setprop', name, value])
352
353    def get_device_arch(self) -> str:
354        output = self.check_run_and_return_output(['shell', 'uname', '-m'])
355        if 'aarch64' in output:
356            return 'arm64'
357        if 'arm' in output:
358            return 'arm'
359        if 'x86_64' in output:
360            return 'x86_64'
361        if '86' in output:
362            return 'x86'
363        log_fatal('unsupported architecture: %s' % output.strip())
364        return ''
365
366    def get_android_version(self) -> int:
367        """ Get Android version on device, like 7 is for Android N, 8 is for Android O."""
368        build_version = self.get_property('ro.build.version.codename')
369        if not build_version or build_version == 'REL':
370            build_version = self.get_property('ro.build.version.release')
371        android_version = 0
372        if build_version:
373            if build_version[0].isdigit():
374                i = 1
375                while i < len(build_version) and build_version[i].isdigit():
376                    i += 1
377                android_version = int(build_version[:i])
378            else:
379                c = build_version[0].upper()
380                if c.isupper() and c >= 'L':
381                    android_version = ord(c) - ord('L') + 5
382        return android_version
383
384
385def flatten_arg_list(arg_list: List[List[str]]) -> List[str]:
386    res = []
387    if arg_list:
388        for items in arg_list:
389            res += items
390    return res
391
392
393def remove(dir_or_file: Union[Path, str]):
394    if os.path.isfile(dir_or_file):
395        os.remove(dir_or_file)
396    elif os.path.isdir(dir_or_file):
397        shutil.rmtree(dir_or_file, ignore_errors=True)
398
399
400def open_report_in_browser(report_path: str):
401    if is_darwin():
402        # On darwin 10.12.6, webbrowser can't open browser, so try `open` cmd first.
403        try:
404            subprocess.check_call(['open', report_path])
405            return
406        except subprocess.CalledProcessError:
407            pass
408    import webbrowser
409    try:
410        # Try to open the report with Chrome
411        browser = webbrowser.get('google-chrome')
412        browser.open(report_path, new=0, autoraise=True)
413    except webbrowser.Error:
414        # webbrowser.get() doesn't work well on darwin/windows.
415        webbrowser.open_new_tab(report_path)
416
417
418class BinaryFinder:
419    def __init__(self, binary_cache_dir: Optional[Union[Path, str]], readelf: ReadElf):
420        if isinstance(binary_cache_dir, str):
421            binary_cache_dir = Path(binary_cache_dir)
422        self.binary_cache_dir = binary_cache_dir
423        self.readelf = readelf
424        self.build_id_map = self._load_build_id_map()
425
426    def _load_build_id_map(self) -> Dict[str, Path]:
427        build_id_map: Dict[str, Path] = {}
428        if self.binary_cache_dir:
429            build_id_list_file = self.binary_cache_dir / 'build_id_list'
430            if build_id_list_file.is_file():
431                with open(self.binary_cache_dir / 'build_id_list', 'rb') as fh:
432                    for line in fh.readlines():
433                        # lines are in format "<build_id>=<path_in_binary_cache>".
434                        items = bytes_to_str(line).strip().split('=')
435                        if len(items) == 2:
436                            build_id_map[items[0]] = self.binary_cache_dir / items[1]
437        return build_id_map
438
439    def find_binary(self, dso_path_in_record_file: str,
440                    expected_build_id: Optional[str]) -> Optional[Path]:
441        """ If expected_build_id is None, don't check build id.
442            Otherwise, the build id of the found binary should match the expected one."""
443        # Find binary from build id map.
444        if expected_build_id:
445            path = self.build_id_map.get(expected_build_id)
446            if path and self._check_path(path, expected_build_id):
447                return path
448        # Find binary by path in binary cache.
449        if self.binary_cache_dir:
450            path = self.binary_cache_dir / dso_path_in_record_file[1:].replace('/', os.sep)
451            if self._check_path(path, expected_build_id):
452                return path
453        # Find binary by its absolute path.
454        path = Path(dso_path_in_record_file)
455        if self._check_path(path, expected_build_id):
456            return path
457        return None
458
459    def _check_path(self, path: Path, expected_build_id: Optional[str]) -> bool:
460        if not self.readelf.is_elf_file(path):
461            return False
462        if expected_build_id is not None:
463            return self.readelf.get_build_id(path) == expected_build_id
464        return True
465
466
467class Addr2Nearestline(object):
468    """ Use llvm-symbolizer to convert (dso_path, func_addr, addr) to (source_file, line).
469        For instructions generated by C++ compilers without a matching statement in source code
470        (like stack corruption check, switch optimization, etc.), addr2line can't generate
471        line information. However, we want to assign the instruction to the nearest line before
472        the instruction (just like objdump -dl). So we use below strategy:
473        Instead of finding the exact line of the instruction in an address, we find the nearest
474        line to the instruction in an address. If an address doesn't have a line info, we find
475        the line info of address - 1. If still no line info, then use address - 2, address - 3,
476        etc.
477
478        The implementation steps are as below:
479        1. Collect all (dso_path, func_addr, addr) requests before converting. This saves the
480        times to call addr2line.
481        2. Convert addrs to (source_file, line) pairs for each dso_path as below:
482          2.1 Check if the dso_path has .debug_line. If not, omit its conversion.
483          2.2 Get arch of the dso_path, and decide the addr_step for it. addr_step is the step we
484          change addr each time. For example, since instructions of arm64 are all 4 bytes long,
485          addr_step for arm64 can be 4.
486          2.3 Use addr2line to find line info for each addr in the dso_path.
487          2.4 For each addr without line info, use addr2line to find line info for
488              range(addr - addr_step, addr - addr_step * 4 - 1, -addr_step).
489          2.5 For each addr without line info, use addr2line to find line info for
490              range(addr - addr_step * 5, addr - addr_step * 128 - 1, -addr_step).
491              (128 is a guess number. A nested switch statement in
492               system/core/demangle/Demangler.cpp has >300 bytes without line info in arm64.)
493    """
494    class Dso(object):
495        """ Info of a dynamic shared library.
496            addrs: a map from address to Addr object in this dso.
497        """
498
499        def __init__(self, build_id: Optional[str]):
500            self.build_id = build_id
501            self.addrs: Dict[int, Addr2Nearestline.Addr] = {}
502            # Saving file names for each addr takes a lot of memory. So we store file ids in Addr,
503            # and provide data structures connecting file id and file name here.
504            self.file_name_to_id: Dict[str, int] = {}
505            self.file_id_to_name: List[str] = []
506            self.func_name_to_id: Dict[str, int] = {}
507            self.func_id_to_name: List[str] = []
508
509        def get_file_id(self, file_path: str) -> int:
510            file_id = self.file_name_to_id.get(file_path)
511            if file_id is None:
512                file_id = self.file_name_to_id[file_path] = len(self.file_id_to_name)
513                self.file_id_to_name.append(file_path)
514            return file_id
515
516        def get_func_id(self, func_name: str) -> int:
517            func_id = self.func_name_to_id.get(func_name)
518            if func_id is None:
519                func_id = self.func_name_to_id[func_name] = len(self.func_id_to_name)
520                self.func_id_to_name.append(func_name)
521            return func_id
522
523    class Addr(object):
524        """ Info of an addr request.
525            func_addr: start_addr of the function containing addr.
526            source_lines: a list of [file_id, line_number] for addr.
527                          source_lines[:-1] are all for inlined functions.
528        """
529
530        def __init__(self, func_addr: int):
531            self.func_addr = func_addr
532            self.source_lines: Optional[List[int, int]] = None
533
534    def __init__(
535            self, ndk_path: Optional[str],
536            binary_finder: BinaryFinder, with_function_name: bool):
537        self.symbolizer_path = ToolFinder.find_tool_path('llvm-symbolizer', ndk_path)
538        if not self.symbolizer_path:
539            log_exit("Can't find llvm-symbolizer. " + NDK_ERROR_MESSAGE)
540        self.readelf = ReadElf(ndk_path)
541        self.dso_map: Dict[str, Addr2Nearestline.Dso] = {}  # map from dso_path to Dso.
542        self.binary_finder = binary_finder
543        self.with_function_name = with_function_name
544
545    def add_addr(self, dso_path: str, build_id: Optional[str], func_addr: int, addr: int):
546        dso = self.dso_map.get(dso_path)
547        if dso is None:
548            dso = self.dso_map[dso_path] = self.Dso(build_id)
549        if addr not in dso.addrs:
550            dso.addrs[addr] = self.Addr(func_addr)
551
552    def convert_addrs_to_lines(self, jobs: int):
553        with ThreadPoolExecutor(jobs) as executor:
554            futures: List[Future] = []
555            for dso_path, dso in self.dso_map.items():
556                futures.append(executor.submit(self._convert_addrs_in_one_dso, dso_path, dso))
557            for future in futures:
558                # Call future.result() to report exceptions raised in the executor.
559                future.result()
560
561    def _convert_addrs_in_one_dso(self, dso_path: str, dso: Addr2Nearestline.Dso):
562        real_path = self.binary_finder.find_binary(dso_path, dso.build_id)
563        if not real_path:
564            if dso_path not in ['//anon', 'unknown', '[kernel.kallsyms]']:
565                logging.debug("Can't find dso %s" % dso_path)
566            return
567
568        if not self._check_debug_line_section(real_path):
569            logging.debug("file %s doesn't contain .debug_line section." % real_path)
570            return
571
572        addr_step = self._get_addr_step(real_path)
573        self._collect_line_info(dso, real_path, [0])
574        self._collect_line_info(dso, real_path, range(-addr_step, -addr_step * 4 - 1, -addr_step))
575        self._collect_line_info(dso, real_path,
576                                range(-addr_step * 5, -addr_step * 128 - 1, -addr_step))
577
578    def _check_debug_line_section(self, real_path: Path) -> bool:
579        return '.debug_line' in self.readelf.get_sections(real_path)
580
581    def _get_addr_step(self, real_path: Path) -> int:
582        arch = self.readelf.get_arch(real_path)
583        if arch == 'arm64':
584            return 4
585        if arch == 'arm':
586            return 2
587        return 1
588
589    def _collect_line_info(
590            self, dso: Addr2Nearestline.Dso, real_path: Path, addr_shifts: List[int]):
591        """ Use addr2line to get line info in a dso, with given addr shifts. """
592        # 1. Collect addrs to send to addr2line.
593        addr_set: Set[int] = set()
594        for addr in dso.addrs:
595            addr_obj = dso.addrs[addr]
596            if addr_obj.source_lines:  # already has source line, no need to search.
597                continue
598            for shift in addr_shifts:
599                # The addr after shift shouldn't change to another function.
600                shifted_addr = max(addr + shift, addr_obj.func_addr)
601                addr_set.add(shifted_addr)
602                if shifted_addr == addr_obj.func_addr:
603                    break
604        if not addr_set:
605            return
606        addr_request = '\n'.join(['0x%x' % addr for addr in sorted(addr_set)])
607
608        # 2. Use addr2line to collect line info.
609        try:
610            subproc = subprocess.Popen(self._build_symbolizer_args(real_path),
611                                       stdin=subprocess.PIPE, stdout=subprocess.PIPE)
612            (stdoutdata, _) = subproc.communicate(str_to_bytes(addr_request))
613            stdoutdata = bytes_to_str(stdoutdata)
614        except OSError:
615            return
616        addr_map = self.parse_line_output(stdoutdata, dso)
617
618        # 3. Fill line info in dso.addrs.
619        for addr in dso.addrs:
620            addr_obj = dso.addrs[addr]
621            if addr_obj.source_lines:
622                continue
623            for shift in addr_shifts:
624                shifted_addr = max(addr + shift, addr_obj.func_addr)
625                lines = addr_map.get(shifted_addr)
626                if lines:
627                    addr_obj.source_lines = lines
628                    break
629                if shifted_addr == addr_obj.func_addr:
630                    break
631
632    def _build_symbolizer_args(self, binary_path: Path) -> List[str]:
633        args = [self.symbolizer_path, '--print-address', '--inlining', '--obj=%s' % binary_path]
634        if self.with_function_name:
635            args += ['--functions=linkage', '--demangle']
636        else:
637            args.append('--functions=none')
638        return args
639
640    def parse_line_output(self, output: str, dso: Addr2Nearestline.Dso) -> Dict[int,
641                                                                                List[Tuple[int]]]:
642        """
643        The output is a list of lines.
644            address1
645            function_name1 (the function name can be empty)
646            source_location1
647            function_name2
648            source_location2
649            ...
650            (end with empty line)
651        """
652
653        addr_map: Dict[int, List[Tuple[int]]] = {}
654        lines = output.strip().splitlines()
655        i = 0
656        while i < len(lines):
657            address = self._parse_line_output_address(lines[i])
658            i += 1
659            if address is None:
660                continue
661            info = []
662            while i < len(lines):
663                if self.with_function_name:
664                    if i + 1 == len(lines):
665                        break
666                    function_name = lines[i].strip()
667                    if not function_name and (':' not in lines[i+1]):
668                        # no more frames
669                        break
670                    i += 1
671                elif not lines[i]:
672                    i += 1
673                    break
674
675                file_path, line_number = self._parse_line_output_source_location(lines[i])
676                i += 1
677                if not file_path or not line_number:
678                    # An addr can have a list of (file, line), when the addr belongs to an inlined
679                    # function. Sometimes only part of the list has ? mark. In this case, we think
680                    # the line info is valid if the first line doesn't have ? mark.
681                    if not info:
682                        break
683                    continue
684                file_id = dso.get_file_id(file_path)
685                if self.with_function_name:
686                    func_id = dso.get_func_id(function_name)
687                    info.append((file_id, line_number, func_id))
688                else:
689                    info.append((file_id, line_number))
690            if info:
691                addr_map[address] = info
692        return addr_map
693
694    def _parse_line_output_address(self, output: str) -> Optional[int]:
695        if output.startswith('0x'):
696            return int(output, 16)
697        return None
698
699    def _parse_line_output_source_location(self, line: str) -> Tuple[Optional[str], Optional[int]]:
700        file_path, line_number = None, None
701        # Handle lines in format filename:line:column, like "runtest/two_functions.cpp:14:25".
702        # Filename may contain ':' like "C:\Users\...\file".
703        items = line.rsplit(':', 2)
704        if len(items) == 3:
705            file_path, line_number = items[:2]
706        if not file_path or ('?' in file_path) or not line_number or ('?' in line_number):
707            return None, None
708        try:
709            line_number = int(line_number)
710        except ValueError:
711            return None, None
712        return file_path, line_number
713
714    def get_dso(self, dso_path: str) -> Addr2Nearestline.Dso:
715        return self.dso_map.get(dso_path)
716
717    def get_addr_source(self, dso: Addr2Nearestline.Dso, addr: int) -> Optional[List[Tuple[int]]]:
718        source = dso.addrs[addr].source_lines
719        if source is None:
720            return None
721        if self.with_function_name:
722            return [(dso.file_id_to_name[file_id], line, dso.func_id_to_name[func_id])
723                    for (file_id, line, func_id) in source]
724        return [(dso.file_id_to_name[file_id], line) for (file_id, line) in source]
725
726
727class SourceFileSearcher(object):
728    """ Find source file paths in the file system.
729        The file paths reported by addr2line are the paths stored in debug sections
730        of shared libraries. And we need to convert them to file paths in the file
731        system. It is done in below steps:
732        1. Collect all file paths under the provided source_dirs. The suffix of a
733           source file should contain one of below:
734            h: for C/C++ header files.
735            c: for C/C++ source files.
736            java: for Java source files.
737            kt: for Kotlin source files.
738        2. Given an abstract_path reported by addr2line, select the best real path
739           as below:
740           2.1 Find all real paths with the same file name as the abstract path.
741           2.2 Select the real path having the longest common suffix with the abstract path.
742    """
743
744    SOURCE_FILE_EXTS = {'.h', '.hh', '.H', '.hxx', '.hpp', '.h++',
745                        '.c', '.cc', '.C', '.cxx', '.cpp', '.c++',
746                        '.java', '.kt'}
747
748    @classmethod
749    def is_source_filename(cls, filename: str) -> bool:
750        ext = os.path.splitext(filename)[1]
751        return ext in cls.SOURCE_FILE_EXTS
752
753    def __init__(self, source_dirs: List[str]):
754        # Map from filename to a list of reversed directory path containing filename.
755        self.filename_to_rparents: Dict[str, List[str]] = {}
756        self._collect_paths(source_dirs)
757
758    def _collect_paths(self, source_dirs: List[str]):
759        for source_dir in source_dirs:
760            for parent, _, file_names in os.walk(source_dir):
761                rparent = None
762                for file_name in file_names:
763                    if self.is_source_filename(file_name):
764                        rparents = self.filename_to_rparents.get(file_name)
765                        if rparents is None:
766                            rparents = self.filename_to_rparents[file_name] = []
767                        if rparent is None:
768                            rparent = parent[::-1]
769                        rparents.append(rparent)
770
771    def get_real_path(self, abstract_path: str) -> Optional[str]:
772        abstract_path = abstract_path.replace('/', os.sep)
773        abstract_parent, file_name = os.path.split(abstract_path)
774        abstract_rparent = abstract_parent[::-1]
775        real_rparents = self.filename_to_rparents.get(file_name)
776        if real_rparents is None:
777            return None
778        best_matched_rparent = None
779        best_common_length = -1
780        for real_rparent in real_rparents:
781            length = len(os.path.commonprefix((real_rparent, abstract_rparent)))
782            if length > best_common_length:
783                best_common_length = length
784                best_matched_rparent = real_rparent
785        if best_matched_rparent is None:
786            return None
787        return os.path.join(best_matched_rparent[::-1], file_name)
788
789
790class Objdump(object):
791    """ A wrapper of objdump to disassemble code. """
792
793    def __init__(self, ndk_path: Optional[str], binary_finder: BinaryFinder):
794        self.ndk_path = ndk_path
795        self.binary_finder = binary_finder
796        self.readelf = ReadElf(ndk_path)
797        self.objdump_paths: Dict[str, str] = {}
798
799    def get_dso_info(self, dso_path: str, expected_build_id: Optional[str]
800                     ) -> Optional[Tuple[str, str]]:
801        real_path = self.binary_finder.find_binary(dso_path, expected_build_id)
802        if not real_path:
803            return None
804        arch = self.readelf.get_arch(real_path)
805        if arch == 'unknown':
806            return None
807        return (str(real_path), arch)
808
809    def disassemble_code(self, dso_info, start_addr, addr_len) -> List[Tuple[str, int]]:
810        """ Disassemble [start_addr, start_addr + addr_len] of dso_path.
811            Return a list of pair (disassemble_code_line, addr).
812        """
813        real_path, arch = dso_info
814        objdump_path = self.objdump_paths.get(arch)
815        if not objdump_path:
816            objdump_path = ToolFinder.find_tool_path('llvm-objdump', self.ndk_path, arch)
817            if not objdump_path:
818                log_exit("Can't find llvm-objdump." + NDK_ERROR_MESSAGE)
819            self.objdump_paths[arch] = objdump_path
820
821        # 3. Run objdump.
822        args = [objdump_path, '-dlC', '--no-show-raw-insn',
823                '--start-address=0x%x' % start_addr,
824                '--stop-address=0x%x' % (start_addr + addr_len),
825                real_path]
826        if arch == 'arm' and 'llvm-objdump' in objdump_path:
827            args += ['--print-imm-hex']
828        try:
829            subproc = subprocess.Popen(args, stdin=subprocess.PIPE, stdout=subprocess.PIPE)
830            (stdoutdata, _) = subproc.communicate()
831            stdoutdata = bytes_to_str(stdoutdata)
832        except OSError:
833            return None
834
835        if not stdoutdata:
836            return None
837        result = []
838        for line in stdoutdata.split('\n'):
839            line = line.rstrip()  # Remove '\r' on Windows.
840            items = line.split(':', 1)
841            try:
842                addr = int(items[0], 16)
843            except ValueError:
844                addr = 0
845            result.append((line, addr))
846        return result
847
848
849class ReadElf(object):
850    """ A wrapper of readelf. """
851
852    def __init__(self, ndk_path: Optional[str]):
853        self.readelf_path = ToolFinder.find_tool_path('llvm-readelf', ndk_path)
854        if not self.readelf_path:
855            log_exit("Can't find llvm-readelf. " + NDK_ERROR_MESSAGE)
856
857    @staticmethod
858    def is_elf_file(path: Union[Path, str]) -> bool:
859        if os.path.isfile(path):
860            with open(path, 'rb') as fh:
861                return fh.read(4) == b'\x7fELF'
862        return False
863
864    def get_arch(self, elf_file_path: Union[Path, str]) -> str:
865        """ Get arch of an elf file. """
866        if self.is_elf_file(elf_file_path):
867            try:
868                output = subprocess.check_output([self.readelf_path, '-h', str(elf_file_path)])
869                output = bytes_to_str(output)
870                if output.find('AArch64') != -1:
871                    return 'arm64'
872                if output.find('ARM') != -1:
873                    return 'arm'
874                if output.find('X86-64') != -1:
875                    return 'x86_64'
876                if output.find('80386') != -1:
877                    return 'x86'
878            except subprocess.CalledProcessError:
879                pass
880        return 'unknown'
881
882    def get_build_id(self, elf_file_path: Union[Path, str], with_padding=True) -> str:
883        """ Get build id of an elf file. """
884        if self.is_elf_file(elf_file_path):
885            try:
886                output = subprocess.check_output([self.readelf_path, '-n', str(elf_file_path)])
887                output = bytes_to_str(output)
888                result = re.search(r'Build ID:\s*(\S+)', output)
889                if result:
890                    build_id = result.group(1)
891                    if with_padding:
892                        build_id = self.pad_build_id(build_id)
893                    return build_id
894            except subprocess.CalledProcessError:
895                pass
896        return ""
897
898    @staticmethod
899    def pad_build_id(build_id: str) -> str:
900        """ Pad build id to 40 hex numbers (20 bytes). """
901        if len(build_id) < 40:
902            build_id += '0' * (40 - len(build_id))
903        else:
904            build_id = build_id[:40]
905        return '0x' + build_id
906
907    @staticmethod
908    def unpad_build_id(build_id: str) -> str:
909        if build_id.startswith('0x'):
910            build_id = build_id[2:]
911            # Unpad build id as TrimZeroesFromBuildIDString() in quipper.
912            padding = '0' * 8
913            while build_id.endswith(padding):
914                build_id = build_id[:-len(padding)]
915        return build_id
916
917    def get_sections(self, elf_file_path: Union[Path, str]) -> List[str]:
918        """ Get sections of an elf file. """
919        section_names: List[str] = []
920        if self.is_elf_file(elf_file_path):
921            try:
922                output = subprocess.check_output([self.readelf_path, '-SW', str(elf_file_path)])
923                output = bytes_to_str(output)
924                for line in output.split('\n'):
925                    # Parse line like:" [ 1] .note.android.ident NOTE  0000000000400190 ...".
926                    result = re.search(r'^\s+\[\s*\d+\]\s(.+?)\s', line)
927                    if result:
928                        section_name = result.group(1).strip()
929                        if section_name:
930                            section_names.append(section_name)
931            except subprocess.CalledProcessError:
932                pass
933        return section_names
934
935
936def extant_dir(arg: str) -> str:
937    """ArgumentParser type that only accepts extant directories.
938
939    Args:
940        arg: The string argument given on the command line.
941    Returns: The argument as a realpath.
942    Raises:
943        argparse.ArgumentTypeError: The given path isn't a directory.
944    """
945    path = os.path.realpath(arg)
946    if not os.path.isdir(path):
947        raise argparse.ArgumentTypeError('{} is not a directory.'.format(path))
948    return path
949
950
951def extant_file(arg: str) -> str:
952    """ArgumentParser type that only accepts extant files.
953
954    Args:
955        arg: The string argument given on the command line.
956    Returns: The argument as a realpath.
957    Raises:
958        argparse.ArgumentTypeError: The given path isn't a file.
959    """
960    path = os.path.realpath(arg)
961    if not os.path.isfile(path):
962        raise argparse.ArgumentTypeError('{} is not a file.'.format(path))
963    return path
964
965
966def log_fatal(msg: str):
967    raise Exception(msg)
968
969
970def log_exit(msg: str):
971    sys.exit(msg)
972
973
974class LogFormatter(logging.Formatter):
975    """ Use custom logging format. """
976
977    def __init__(self):
978        super().__init__('%(asctime)s [%(levelname)s] (%(filename)s:%(lineno)d) %(message)s')
979
980    def formatTime(self, record, datefmt):
981        return super().formatTime(record, '%H:%M:%S') + ',%03d' % record.msecs
982
983
984class Log:
985    initialized = False
986
987    @classmethod
988    def init(cls, log_level: str = 'info'):
989        assert not cls.initialized
990        cls.initialized = True
991        cls.logger = logging.root
992        cls.logger.setLevel(log_level.upper())
993        handler = logging.StreamHandler()
994        handler.setFormatter(LogFormatter())
995        cls.logger.addHandler(handler)
996
997
998class ArgParseFormatter(
999        argparse.ArgumentDefaultsHelpFormatter, argparse.RawDescriptionHelpFormatter):
1000    pass
1001
1002
1003@dataclass
1004class ReportLibOptions:
1005    show_art_frames: bool
1006    trace_offcpu: str
1007    proguard_mapping_files: List[str]
1008    sample_filters: List[str]
1009    aggregate_threads: List[str]
1010
1011
1012class BaseArgumentParser(argparse.ArgumentParser):
1013    def __init__(self, *args, **kwargs):
1014        super().__init__(*args, **kwargs, formatter_class=ArgParseFormatter)
1015        self.has_sample_filter_options = False
1016        self.sample_filter_with_pid_shortcut = False
1017        self.has_report_lib_options = False
1018
1019    def add_report_lib_options(self, group: Optional[Any] = None,
1020                               default_show_art_frames: bool = False,
1021                               sample_filter_group: Optional[Any] = None,
1022                               sample_filter_with_pid_shortcut: bool = True):
1023        self.has_report_lib_options = True
1024        parser = group if group else self
1025        parser.add_argument(
1026            '--proguard-mapping-file', nargs='+',
1027            help='Add proguard mapping file to de-obfuscate symbols')
1028        parser.add_argument('--show-art-frames', '--show_art_frames',
1029                            action=argparse.BooleanOptionalAction, default=default_show_art_frames,
1030                            help='Show frames of internal methods in the ART Java interpreter.')
1031        parser.add_argument(
1032            '--trace-offcpu', choices=['on-cpu', 'off-cpu', 'on-off-cpu', 'mixed-on-off-cpu'],
1033            help="""Set report mode for profiles recorded with --trace-offcpu option. All possible
1034                    modes are: on-cpu (only on-cpu samples), off-cpu (only off-cpu samples),
1035                    on-off-cpu (both on-cpu and off-cpu samples, can be split by event name),
1036                    mixed-on-off-cpu (on-cpu and off-cpu samples using the same event name).
1037                    If not set, mixed-on-off-cpu mode is used.
1038                """)
1039        self._add_sample_filter_options(sample_filter_group, sample_filter_with_pid_shortcut)
1040        parser.add_argument(
1041            '--aggregate-threads', nargs='+', metavar='thread_name_regex',
1042            help="""Aggregate threads with names matching the same regex. As a result, samples from
1043                    different threads (like a thread pool) can be shown in one flamegraph.
1044                """)
1045
1046    def _add_sample_filter_options(
1047            self, group: Optional[Any] = None, with_pid_shortcut: bool = True):
1048        if not group:
1049            group = self.add_argument_group('Sample filter options')
1050        group.add_argument('--exclude-pid', metavar='pid', nargs='+', type=int,
1051                           help='exclude samples for selected processes')
1052        group.add_argument('--exclude-tid', metavar='tid', nargs='+', type=int,
1053                           help='exclude samples for selected threads')
1054        group.add_argument(
1055            '--exclude-process-name', metavar='process_name_regex', nargs='+',
1056            help='exclude samples for processes with name containing the regular expression')
1057        group.add_argument(
1058            '--exclude-thread-name', metavar='thread_name_regex', nargs='+',
1059            help='exclude samples for threads with name containing the regular expression')
1060
1061        if with_pid_shortcut:
1062            group.add_argument('--pid', metavar='pid', nargs='+', type=int,
1063                               help='only include samples for selected processes')
1064            group.add_argument('--tid', metavar='tid', nargs='+', type=int,
1065                               help='only include samples for selected threads')
1066        group.add_argument('--include-pid', metavar='pid', nargs='+', type=int,
1067                           help='only include samples for selected processes')
1068        group.add_argument('--include-tid', metavar='tid', nargs='+', type=int,
1069                           help='only include samples for selected threads')
1070        group.add_argument(
1071            '--include-process-name', metavar='process_name_regex', nargs='+',
1072            help='only include samples for processes with name containing the regular expression')
1073        group.add_argument(
1074            '--comm', '--include-thread-name', metavar='thread_name_regex',
1075            dest='include_thread_name', nargs='+',
1076            help='only include samples for threads with name containing the regular expression')
1077        group.add_argument(
1078            '--filter-file', metavar='file',
1079            help='use filter file to filter samples based on timestamps. ' +
1080            'The file format is in doc/sampler_filter.md.')
1081        self.has_sample_filter_options = True
1082        self.sample_filter_with_pid_shortcut = with_pid_shortcut
1083
1084    def _build_sample_filter(self, args: argparse.Namespace) -> List[str]:
1085        """ Build sample filters, which can be passed to ReportLib.SetSampleFilter(). """
1086        filters = []
1087        if args.exclude_pid:
1088            filters.extend(['--exclude-pid', ','.join(str(pid) for pid in args.exclude_pid)])
1089        if args.exclude_tid:
1090            filters.extend(['--exclude-tid', ','.join(str(tid) for tid in args.exclude_tid)])
1091        if args.exclude_process_name:
1092            for name in args.exclude_process_name:
1093                filters.extend(['--exclude-process-name', name])
1094        if args.exclude_thread_name:
1095            for name in args.exclude_thread_name:
1096                filters.extend(['--exclude-thread-name', name])
1097
1098        if args.include_pid:
1099            filters.extend(['--include-pid', ','.join(str(pid) for pid in args.include_pid)])
1100        if args.include_tid:
1101            filters.extend(['--include-tid', ','.join(str(tid) for tid in args.include_tid)])
1102        if self.sample_filter_with_pid_shortcut:
1103            if args.pid:
1104                filters.extend(['--include-pid', ','.join(str(pid) for pid in args.pid)])
1105            if args.tid:
1106                filters.extend(['--include-tid', ','.join(str(pid) for pid in args.tid)])
1107        if args.include_process_name:
1108            for name in args.include_process_name:
1109                filters.extend(['--include-process-name', name])
1110        if args.include_thread_name:
1111            for name in args.include_thread_name:
1112                filters.extend(['--include-thread-name', name])
1113        if args.filter_file:
1114            filters.extend(['--filter-file', args.filter_file])
1115        return filters
1116
1117    def parse_known_args(self, *args, **kwargs):
1118        self.add_argument(
1119            '--log', choices=['debug', 'info', 'warning'],
1120            default='info', help='set log level')
1121        namespace, left_args = super().parse_known_args(*args, **kwargs)
1122
1123        if self.has_report_lib_options:
1124            sample_filters = self._build_sample_filter(namespace)
1125            report_lib_options = ReportLibOptions(
1126                namespace.show_art_frames, namespace.trace_offcpu, namespace.proguard_mapping_file,
1127                sample_filters, namespace.aggregate_threads)
1128            setattr(namespace, 'report_lib_options', report_lib_options)
1129
1130        if not Log.initialized:
1131            Log.init(namespace.log)
1132        return namespace, left_args
1133