• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#!/usr/bin/env python3
2#
3# Copyright (C) 2016 The Android Open Source Project
4#
5# Licensed under the Apache License, Version 2.0 (the "License");
6# you may not use this file except in compliance with the License.
7# You may obtain a copy of the License at
8#
9#      http://www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing, software
12# distributed under the License is distributed on an "AS IS" BASIS,
13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14# See the License for the specific language governing permissions and
15# limitations under the License.
16#
17
18"""simpleperf_report_lib.py: a python wrapper of libsimpleperf_report.so.
19   Used to access samples in perf.data.
20
21"""
22
23import collections
24import ctypes as ct
25from pathlib import Path
26import struct
27from typing import Any, Dict, List, Optional, Union
28
29from simpleperf_utils import (bytes_to_str, get_host_binary_path, is_windows, str_to_bytes,
30                              ReportLibOptions)
31
32
33def _is_null(p: Optional[ct._Pointer]) -> bool:
34    if p:
35        return False
36    return ct.cast(p, ct.c_void_p).value is None
37
38
39def _char_pt(s: str) -> bytes:
40    return str_to_bytes(s)
41
42
43def _char_pt_to_str(char_pt: ct.c_char_p) -> str:
44    return bytes_to_str(char_pt)
45
46
47def _check(cond: bool, failmsg: str):
48    if not cond:
49        raise RuntimeError(failmsg)
50
51
52class SampleStruct(ct.Structure):
53    """ Instance of a sample in perf.data.
54        ip: the program counter of the thread generating the sample.
55        pid: process id (or thread group id) of the thread generating the sample.
56        tid: thread id.
57        thread_comm: thread name.
58        time: time at which the sample was generated. The value is in nanoseconds.
59              The clock is decided by the --clockid option in `simpleperf record`.
60        in_kernel: whether the instruction is in kernel space or user space.
61        cpu: the cpu generating the sample.
62        period: count of events have happened since last sample. For example, if we use
63             -e cpu-cycles, it means how many cpu-cycles have happened.
64             If we use -e cpu-clock, it means how many nanoseconds have passed.
65    """
66    _fields_ = [('ip', ct.c_uint64),
67                ('pid', ct.c_uint32),
68                ('tid', ct.c_uint32),
69                ('_thread_comm', ct.c_char_p),
70                ('time', ct.c_uint64),
71                ('_in_kernel', ct.c_uint32),
72                ('cpu', ct.c_uint32),
73                ('period', ct.c_uint64)]
74
75    @property
76    def thread_comm(self) -> str:
77        return _char_pt_to_str(self._thread_comm)
78
79    @property
80    def in_kernel(self) -> bool:
81        return bool(self._in_kernel)
82
83
84class TracingFieldFormatStruct(ct.Structure):
85    """Format of a tracing field.
86       name: name of the field.
87       offset: offset of the field in tracing data.
88       elem_size: size of the element type.
89       elem_count: the number of elements in this field, more than one if the field is an array.
90       is_signed: whether the element type is signed or unsigned.
91       is_dynamic: whether the element is a dynamic string.
92    """
93    _fields_ = [('_name', ct.c_char_p),
94                ('offset', ct.c_uint32),
95                ('elem_size', ct.c_uint32),
96                ('elem_count', ct.c_uint32),
97                ('is_signed', ct.c_uint32),
98                ('is_dynamic', ct.c_uint32)]
99
100    _unpack_key_dict = {1: 'b', 2: 'h', 4: 'i', 8: 'q'}
101
102    @property
103    def name(self) -> str:
104        return _char_pt_to_str(self._name)
105
106    def parse_value(self, data: ct.c_char_p) -> Union[str, bytes, List[bytes]]:
107        """ Parse value of a field in a tracepoint event.
108            The return value depends on the type of the field, and can be an int value, a string,
109            an array of int values, etc. If the type can't be parsed, return a byte array or an
110            array of byte arrays.
111        """
112        if self.is_dynamic:
113            offset, max_len = struct.unpack('<HH', data[self.offset:self.offset + 4])
114            length = 0
115            while length < max_len and bytes_to_str(data[offset + length]) != '\x00':
116                length += 1
117            return bytes_to_str(data[offset: offset + length])
118
119        if self.elem_count > 1 and self.elem_size == 1:
120            # Probably the field is a string.
121            # Don't use self.is_signed, which has different values on x86 and arm.
122            length = 0
123            while length < self.elem_count and bytes_to_str(data[self.offset + length]) != '\x00':
124                length += 1
125            return bytes_to_str(data[self.offset: self.offset + length])
126        unpack_key = self._unpack_key_dict.get(self.elem_size)
127        if unpack_key:
128            if not self.is_signed:
129                unpack_key = unpack_key.upper()
130            value = struct.unpack('%d%s' % (self.elem_count, unpack_key),
131                                  data[self.offset:self.offset + self.elem_count * self.elem_size])
132        else:
133            # Since we don't know the element type, just return the bytes.
134            value = []
135            offset = self.offset
136            for _ in range(self.elem_count):
137                value.append(data[offset: offset + self.elem_size])
138                offset += self.elem_size
139        if self.elem_count == 1:
140            value = value[0]
141        return value
142
143
144class TracingDataFormatStruct(ct.Structure):
145    """Format of tracing data of a tracepoint event, like
146       https://www.kernel.org/doc/html/latest/trace/events.html#event-formats.
147       size: total size of all fields in the tracing data.
148       field_count: the number of fields.
149       fields: an array of fields.
150    """
151    _fields_ = [('size', ct.c_uint32),
152                ('field_count', ct.c_uint32),
153                ('fields', ct.POINTER(TracingFieldFormatStruct))]
154
155
156class EventStruct(ct.Structure):
157    """Event type of a sample.
158       name: name of the event type.
159       tracing_data_format: only available when it is a tracepoint event.
160    """
161    _fields_ = [('_name', ct.c_char_p),
162                ('tracing_data_format', TracingDataFormatStruct)]
163
164    @property
165    def name(self) -> str:
166        return _char_pt_to_str(self._name)
167
168
169class MappingStruct(ct.Structure):
170    """ A mapping area in the monitored threads, like the content in /proc/<pid>/maps.
171        start: start addr in memory.
172        end: end addr in memory.
173        pgoff: offset in the mapped shared library.
174    """
175    _fields_ = [('start', ct.c_uint64),
176                ('end', ct.c_uint64),
177                ('pgoff', ct.c_uint64)]
178
179
180class SymbolStruct(ct.Structure):
181    """ Symbol info of the instruction hit by a sample or a callchain entry of a sample.
182        dso_name: path of the shared library containing the instruction.
183        vaddr_in_file: virtual address of the instruction in the shared library.
184        symbol_name: name of the function containing the instruction.
185        symbol_addr: start addr of the function containing the instruction.
186        symbol_len: length of the function in the shared library.
187        mapping: the mapping area hit by the instruction.
188    """
189    _fields_ = [('_dso_name', ct.c_char_p),
190                ('vaddr_in_file', ct.c_uint64),
191                ('_symbol_name', ct.c_char_p),
192                ('symbol_addr', ct.c_uint64),
193                ('symbol_len', ct.c_uint64),
194                ('mapping', ct.POINTER(MappingStruct))]
195
196    @property
197    def dso_name(self) -> str:
198        return _char_pt_to_str(self._dso_name)
199
200    @property
201    def symbol_name(self) -> str:
202        return _char_pt_to_str(self._symbol_name)
203
204
205class CallChainEntryStructure(ct.Structure):
206    """ A callchain entry of a sample.
207        ip: the address of the instruction of the callchain entry.
208        symbol: symbol info of the callchain entry.
209    """
210    _fields_ = [('ip', ct.c_uint64),
211                ('symbol', SymbolStruct)]
212
213
214class CallChainStructure(ct.Structure):
215    """ Callchain info of a sample.
216        nr: number of entries in the callchain.
217        entries: a pointer to an array of CallChainEntryStructure.
218
219        For example, if a sample is generated when a thread is running function C
220        with callchain function A -> function B -> function C.
221        Then nr = 2, and entries = [function B, function A].
222    """
223    _fields_ = [('nr', ct.c_uint32),
224                ('entries', ct.POINTER(CallChainEntryStructure))]
225
226
227class FeatureSectionStructure(ct.Structure):
228    """ A feature section in perf.data to store information like record cmd, device arch, etc.
229        data: a pointer to a buffer storing the section data.
230        data_size: data size in bytes.
231    """
232    _fields_ = [('data', ct.POINTER(ct.c_char)),
233                ('data_size', ct.c_uint32)]
234
235
236class ReportLibStructure(ct.Structure):
237    _fields_ = []
238
239
240# pylint: disable=invalid-name
241class ReportLib(object):
242
243    def __init__(self, native_lib_path: Optional[str] = None):
244        if native_lib_path is None:
245            native_lib_path = self._get_native_lib()
246
247        self._load_dependent_lib()
248        self._lib = ct.CDLL(native_lib_path)
249        self._CreateReportLibFunc = self._lib.CreateReportLib
250        self._CreateReportLibFunc.restype = ct.POINTER(ReportLibStructure)
251        self._DestroyReportLibFunc = self._lib.DestroyReportLib
252        self._SetLogSeverityFunc = self._lib.SetLogSeverity
253        self._SetSymfsFunc = self._lib.SetSymfs
254        self._SetRecordFileFunc = self._lib.SetRecordFile
255        self._SetKallsymsFileFunc = self._lib.SetKallsymsFile
256        self._ShowIpForUnknownSymbolFunc = self._lib.ShowIpForUnknownSymbol
257        self._ShowArtFramesFunc = self._lib.ShowArtFrames
258        self._MergeJavaMethodsFunc = self._lib.MergeJavaMethods
259        self._AddProguardMappingFileFunc = self._lib.AddProguardMappingFile
260        self._AddProguardMappingFileFunc.restype = ct.c_bool
261        self._GetSupportedTraceOffCpuModesFunc = self._lib.GetSupportedTraceOffCpuModes
262        self._GetSupportedTraceOffCpuModesFunc.restype = ct.c_char_p
263        self._SetTraceOffCpuModeFunc = self._lib.SetTraceOffCpuMode
264        self._SetTraceOffCpuModeFunc.restype = ct.c_bool
265        self._SetSampleFilterFunc = self._lib.SetSampleFilter
266        self._SetSampleFilterFunc.restype = ct.c_bool
267        self._GetNextSampleFunc = self._lib.GetNextSample
268        self._GetNextSampleFunc.restype = ct.POINTER(SampleStruct)
269        self._GetEventOfCurrentSampleFunc = self._lib.GetEventOfCurrentSample
270        self._GetEventOfCurrentSampleFunc.restype = ct.POINTER(EventStruct)
271        self._GetSymbolOfCurrentSampleFunc = self._lib.GetSymbolOfCurrentSample
272        self._GetSymbolOfCurrentSampleFunc.restype = ct.POINTER(SymbolStruct)
273        self._GetCallChainOfCurrentSampleFunc = self._lib.GetCallChainOfCurrentSample
274        self._GetCallChainOfCurrentSampleFunc.restype = ct.POINTER(CallChainStructure)
275        self._GetTracingDataOfCurrentSampleFunc = self._lib.GetTracingDataOfCurrentSample
276        self._GetTracingDataOfCurrentSampleFunc.restype = ct.POINTER(ct.c_char)
277        self._GetBuildIdForPathFunc = self._lib.GetBuildIdForPath
278        self._GetBuildIdForPathFunc.restype = ct.c_char_p
279        self._GetFeatureSection = self._lib.GetFeatureSection
280        self._GetFeatureSection.restype = ct.POINTER(FeatureSectionStructure)
281        self._instance = self._CreateReportLibFunc()
282        assert not _is_null(self._instance)
283
284        self.meta_info: Optional[Dict[str, str]] = None
285        self.current_sample: Optional[SampleStruct] = None
286        self.record_cmd: Optional[str] = None
287
288    def _get_native_lib(self) -> str:
289        return get_host_binary_path('libsimpleperf_report.so')
290
291    def _load_dependent_lib(self):
292        # As the windows dll is built with mingw we need to load 'libwinpthread-1.dll'.
293        if is_windows():
294            self._libwinpthread = ct.CDLL(get_host_binary_path('libwinpthread-1.dll'))
295
296    def Close(self):
297        if self._instance:
298            self._DestroyReportLibFunc(self._instance)
299            self._instance = None
300
301    def SetReportOptions(self, options: ReportLibOptions):
302        """ Set report options in one call. """
303        if options.proguard_mapping_files:
304            for file_path in options.proguard_mapping_files:
305                self.AddProguardMappingFile(file_path)
306        if options.show_art_frames:
307            self.ShowArtFrames(True)
308        if options.trace_offcpu:
309            self.SetTraceOffCpuMode(options.trace_offcpu)
310        if options.sample_filters:
311            self.SetSampleFilter(options.sample_filters)
312
313    def SetLogSeverity(self, log_level: str = 'info'):
314        """ Set log severity of native lib, can be verbose,debug,info,error,fatal."""
315        cond: bool = self._SetLogSeverityFunc(self.getInstance(), _char_pt(log_level))
316        _check(cond, 'Failed to set log level')
317
318    def SetSymfs(self, symfs_dir: str):
319        """ Set directory used to find symbols."""
320        cond: bool = self._SetSymfsFunc(self.getInstance(), _char_pt(symfs_dir))
321        _check(cond, 'Failed to set symbols directory')
322
323    def SetRecordFile(self, record_file: str):
324        """ Set the path of record file, like perf.data."""
325        cond: bool = self._SetRecordFileFunc(self.getInstance(), _char_pt(record_file))
326        _check(cond, 'Failed to set record file')
327
328    def ShowIpForUnknownSymbol(self):
329        self._ShowIpForUnknownSymbolFunc(self.getInstance())
330
331    def ShowArtFrames(self, show: bool = True):
332        """ Show frames of internal methods of the Java interpreter. """
333        self._ShowArtFramesFunc(self.getInstance(), show)
334
335    def MergeJavaMethods(self, merge: bool = True):
336        """ This option merges jitted java methods with the same name but in different jit
337            symfiles. If possible, it also merges jitted methods with interpreted methods,
338            by mapping jitted methods to their corresponding dex files.
339            Side effects:
340              It only works at method level, not instruction level.
341              It makes symbol.vaddr_in_file and symbol.mapping not accurate for jitted methods.
342            Java methods are merged by default.
343        """
344        self._MergeJavaMethodsFunc(self.getInstance(), merge)
345
346    def AddProguardMappingFile(self, mapping_file: Union[str, Path]):
347        """ Add proguard mapping.txt to de-obfuscate method names. """
348        if not self._AddProguardMappingFileFunc(self.getInstance(), _char_pt(str(mapping_file))):
349            raise ValueError(f'failed to add proguard mapping file: {mapping_file}')
350
351    def SetKallsymsFile(self, kallsym_file: str):
352        """ Set the file path to a copy of the /proc/kallsyms file (for off device decoding) """
353        cond: bool = self._SetKallsymsFileFunc(self.getInstance(), _char_pt(kallsym_file))
354        _check(cond, 'Failed to set kallsyms file')
355
356    def GetSupportedTraceOffCpuModes(self) -> List[str]:
357        """ Get trace-offcpu modes supported by the recording file. It should be called after
358            SetRecordFile(). The modes are only available for profiles recorded with --trace-offcpu
359            option. All possible modes are:
360              on-cpu:           report on-cpu samples with period representing time spent on cpu
361              off-cpu:          report off-cpu samples with period representing time spent off cpu
362              on-off-cpu:       report both on-cpu samples and off-cpu samples, which can be split
363                                by event name.
364              mixed-on-off-cpu: report on-cpu and off-cpu samples under the same event name.
365        """
366        modes_str = self._GetSupportedTraceOffCpuModesFunc(self.getInstance())
367        _check(not _is_null(modes_str), 'Failed to call GetSupportedTraceOffCpuModes()')
368        modes_str = _char_pt_to_str(modes_str)
369        return modes_str.split(',') if modes_str else []
370
371    def SetTraceOffCpuMode(self, mode: str):
372        """ Set trace-offcpu mode. It should be called after SetRecordFile(). The mode should be
373            one of the modes returned by GetSupportedTraceOffCpuModes().
374        """
375        res: bool = self._SetTraceOffCpuModeFunc(self.getInstance(), _char_pt(mode))
376        _check(res, f'Failed to call SetTraceOffCpuMode({mode})')
377
378    def SetSampleFilter(self, filters: List[str]):
379        """ Set options used to filter samples. Available options are:
380            --exclude-pid pid1,pid2,...   Exclude samples for selected processes.
381            --exclude-tid tid1,tid2,...   Exclude samples for selected threads.
382            --exclude-process-name process_name_regex   Exclude samples for processes with name
383                                                        containing the regular expression.
384            --exclude-thread-name thread_name_regex     Exclude samples for threads with name
385                                                        containing the regular expression.
386            --include-pid pid1,pid2,...   Include samples for selected processes.
387            --include-tid tid1,tid2,...   Include samples for selected threads.
388            --include-process-name process_name_regex   Include samples for processes with name
389                                                        containing the regular expression.
390            --include-thread-name thread_name_regex     Include samples for threads with name
391                                                        containing the regular expression.
392            --filter-file <file>          Use filter file to filter samples based on timestamps. The
393                                          file format is in doc/sampler_filter.md.
394
395            The filter argument should be a concatenation of options.
396        """
397        filter_array = (ct.c_char_p * len(filters))()
398        filter_array[:] = [_char_pt(f) for f in filters]
399        res: bool = self._SetSampleFilterFunc(self.getInstance(), filter_array, len(filters))
400        _check(res, f'Failed to call SetSampleFilter({filters})')
401
402    def GetNextSample(self) -> Optional[SampleStruct]:
403        """ Return the next sample. If no more samples, return None. """
404        psample = self._GetNextSampleFunc(self.getInstance())
405        if _is_null(psample):
406            self.current_sample = None
407        else:
408            self.current_sample = psample[0]
409        return self.current_sample
410
411    def GetCurrentSample(self) -> Optional[SampleStruct]:
412        return self.current_sample
413
414    def GetEventOfCurrentSample(self) -> EventStruct:
415        event = self._GetEventOfCurrentSampleFunc(self.getInstance())
416        assert not _is_null(event)
417        return event[0]
418
419    def GetSymbolOfCurrentSample(self) -> SymbolStruct:
420        symbol = self._GetSymbolOfCurrentSampleFunc(self.getInstance())
421        assert not _is_null(symbol)
422        return symbol[0]
423
424    def GetCallChainOfCurrentSample(self) -> CallChainStructure:
425        callchain = self._GetCallChainOfCurrentSampleFunc(self.getInstance())
426        assert not _is_null(callchain)
427        return callchain[0]
428
429    def GetTracingDataOfCurrentSample(self) -> Optional[Dict[str, Any]]:
430        data = self._GetTracingDataOfCurrentSampleFunc(self.getInstance())
431        if _is_null(data):
432            return None
433        event = self.GetEventOfCurrentSample()
434        result = collections.OrderedDict()
435        for i in range(event.tracing_data_format.field_count):
436            field = event.tracing_data_format.fields[i]
437            result[field.name] = field.parse_value(data)
438        return result
439
440    def GetBuildIdForPath(self, path: str) -> str:
441        build_id = self._GetBuildIdForPathFunc(self.getInstance(), _char_pt(path))
442        assert not _is_null(build_id)
443        return _char_pt_to_str(build_id)
444
445    def GetRecordCmd(self) -> str:
446        if self.record_cmd is not None:
447            return self.record_cmd
448        self.record_cmd = ''
449        feature_data = self._GetFeatureSection(self.getInstance(), _char_pt('cmdline'))
450        if not _is_null(feature_data):
451            void_p = ct.cast(feature_data[0].data, ct.c_void_p)
452            arg_count = ct.cast(void_p, ct.POINTER(ct.c_uint32)).contents.value
453            void_p.value += 4
454            args = []
455            for _ in range(arg_count):
456                str_len = ct.cast(void_p, ct.POINTER(ct.c_uint32)).contents.value
457                void_p.value += 4
458                char_p = ct.cast(void_p, ct.POINTER(ct.c_char))
459                current_str = ''
460                for j in range(str_len):
461                    c = bytes_to_str(char_p[j])
462                    if c != '\0':
463                        current_str += c
464                if ' ' in current_str:
465                    current_str = '"' + current_str + '"'
466                args.append(current_str)
467                void_p.value += str_len
468            self.record_cmd = ' '.join(args)
469        return self.record_cmd
470
471    def _GetFeatureString(self, feature_name: str) -> str:
472        feature_data = self._GetFeatureSection(self.getInstance(), _char_pt(feature_name))
473        result = ''
474        if not _is_null(feature_data):
475            void_p = ct.cast(feature_data[0].data, ct.c_void_p)
476            str_len = ct.cast(void_p, ct.POINTER(ct.c_uint32)).contents.value
477            void_p.value += 4
478            char_p = ct.cast(void_p, ct.POINTER(ct.c_char))
479            for i in range(str_len):
480                c = bytes_to_str(char_p[i])
481                if c == '\0':
482                    break
483                result += c
484        return result
485
486    def GetArch(self) -> str:
487        return self._GetFeatureString('arch')
488
489    def MetaInfo(self) -> Dict[str, str]:
490        """ Return a string to string map stored in meta_info section in perf.data.
491            It is used to pass some short meta information.
492        """
493        if self.meta_info is None:
494            self.meta_info = {}
495            feature_data = self._GetFeatureSection(self.getInstance(), _char_pt('meta_info'))
496            if not _is_null(feature_data):
497                str_list = []
498                data = feature_data[0].data
499                data_size = feature_data[0].data_size
500                current_str = ''
501                for i in range(data_size):
502                    c = bytes_to_str(data[i])
503                    if c != '\0':
504                        current_str += c
505                    else:
506                        str_list.append(current_str)
507                        current_str = ''
508                for i in range(0, len(str_list), 2):
509                    self.meta_info[str_list[i]] = str_list[i + 1]
510        return self.meta_info
511
512    def getInstance(self) -> ct._Pointer:
513        if self._instance is None:
514            raise Exception('Instance is Closed')
515        return self._instance
516