• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#!/usr/bin/env python3
2#
3# Copyright (C) 2016 The Android Open Source Project
4#
5# Licensed under the Apache License, Version 2.0 (the "License");
6# you may not use this file except in compliance with the License.
7# You may obtain a copy of the License at
8#
9#      http://www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing, software
12# distributed under the License is distributed on an "AS IS" BASIS,
13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14# See the License for the specific language governing permissions and
15# limitations under the License.
16#
17
18"""simpleperf_report_lib.py: a python wrapper of libsimpleperf_report.so.
19   Used to access samples in perf.data.
20
21"""
22
23import collections
24from collections import namedtuple
25import ctypes as ct
26import etm_types as etm
27from pathlib import Path
28import struct
29from typing import Any, Callable, Dict, List, Optional, Tuple, Union
30
31from simpleperf_utils import (bytes_to_str, get_host_binary_path, is_windows, log_exit,
32                              str_to_bytes, ReportLibOptions)
33
34
35def _is_null(p: Optional[ct._Pointer]) -> bool:
36    if p:
37        return False
38    return ct.cast(p, ct.c_void_p).value is None
39
40
41def _char_pt(s: str) -> bytes:
42    return str_to_bytes(s)
43
44
45def _char_pt_to_str(char_pt: ct.c_char_p) -> str:
46    return bytes_to_str(char_pt)
47
48
49def _check(cond: bool, failmsg: str):
50    if not cond:
51        raise RuntimeError(failmsg)
52
53
54class SampleStruct(ct.Structure):
55    """ Instance of a sample in perf.data.
56        ip: the program counter of the thread generating the sample.
57        pid: process id (or thread group id) of the thread generating the sample.
58        tid: thread id.
59        thread_comm: thread name.
60        time: time at which the sample was generated. The value is in nanoseconds.
61              The clock is decided by the --clockid option in `simpleperf record`.
62        in_kernel: whether the instruction is in kernel space or user space.
63        cpu: the cpu generating the sample.
64        period: count of events have happened since last sample. For example, if we use
65             -e cpu-cycles, it means how many cpu-cycles have happened.
66             If we use -e cpu-clock, it means how many nanoseconds have passed.
67    """
68    _fields_ = [('ip', ct.c_uint64),
69                ('pid', ct.c_uint32),
70                ('tid', ct.c_uint32),
71                ('_thread_comm', ct.c_char_p),
72                ('time', ct.c_uint64),
73                ('_in_kernel', ct.c_uint32),
74                ('cpu', ct.c_uint32),
75                ('period', ct.c_uint64)]
76
77    @property
78    def thread_comm(self) -> str:
79        return _char_pt_to_str(self._thread_comm)
80
81    @property
82    def in_kernel(self) -> bool:
83        return bool(self._in_kernel)
84
85
86class TracingFieldFormatStruct(ct.Structure):
87    """Format of a tracing field.
88       name: name of the field.
89       offset: offset of the field in tracing data.
90       elem_size: size of the element type.
91       elem_count: the number of elements in this field, more than one if the field is an array.
92       is_signed: whether the element type is signed or unsigned.
93       is_dynamic: whether the element is a dynamic string.
94    """
95    _fields_ = [('_name', ct.c_char_p),
96                ('offset', ct.c_uint32),
97                ('elem_size', ct.c_uint32),
98                ('elem_count', ct.c_uint32),
99                ('is_signed', ct.c_uint32),
100                ('is_dynamic', ct.c_uint32)]
101
102    _unpack_key_dict = {1: 'b', 2: 'h', 4: 'i', 8: 'q'}
103
104    @property
105    def name(self) -> str:
106        return _char_pt_to_str(self._name)
107
108    def parse_value(self, data: ct.c_char_p) -> Union[str, bytes, List[bytes]]:
109        """ Parse value of a field in a tracepoint event.
110            The return value depends on the type of the field, and can be an int value, a string,
111            an array of int values, etc. If the type can't be parsed, return a byte array or an
112            array of byte arrays.
113        """
114        if self.is_dynamic:
115            offset, max_len = struct.unpack('<HH', data[self.offset:self.offset + 4])
116            length = 0
117            while length < max_len and bytes_to_str(data[offset + length]) != '\x00':
118                length += 1
119            return bytes_to_str(data[offset: offset + length])
120
121        if self.elem_count > 1 and self.elem_size == 1:
122            # Probably the field is a string.
123            # Don't use self.is_signed, which has different values on x86 and arm.
124            length = 0
125            while length < self.elem_count and bytes_to_str(data[self.offset + length]) != '\x00':
126                length += 1
127            return bytes_to_str(data[self.offset: self.offset + length])
128        unpack_key = self._unpack_key_dict.get(self.elem_size)
129        if unpack_key:
130            if not self.is_signed:
131                unpack_key = unpack_key.upper()
132            value = struct.unpack('%d%s' % (self.elem_count, unpack_key),
133                                  data[self.offset:self.offset + self.elem_count * self.elem_size])
134        else:
135            # Since we don't know the element type, just return the bytes.
136            value = []
137            offset = self.offset
138            for _ in range(self.elem_count):
139                value.append(data[offset: offset + self.elem_size])
140                offset += self.elem_size
141        if self.elem_count == 1:
142            value = value[0]
143        return value
144
145
146class TracingDataFormatStruct(ct.Structure):
147    """Format of tracing data of a tracepoint event, like
148       https://www.kernel.org/doc/html/latest/trace/events.html#event-formats.
149       size: total size of all fields in the tracing data.
150       field_count: the number of fields.
151       fields: an array of fields.
152    """
153    _fields_ = [('size', ct.c_uint32),
154                ('field_count', ct.c_uint32),
155                ('fields', ct.POINTER(TracingFieldFormatStruct))]
156
157
158class EventStruct(ct.Structure):
159    """Event type of a sample.
160       name: name of the event type.
161       tracing_data_format: only available when it is a tracepoint event.
162    """
163    _fields_ = [('_name', ct.c_char_p),
164                ('tracing_data_format', TracingDataFormatStruct)]
165
166    @property
167    def name(self) -> str:
168        return _char_pt_to_str(self._name)
169
170
171class MappingStruct(ct.Structure):
172    """ A mapping area in the monitored threads, like the content in /proc/<pid>/maps.
173        start: start addr in memory.
174        end: end addr in memory.
175        pgoff: offset in the mapped shared library.
176    """
177    _fields_ = [('start', ct.c_uint64),
178                ('end', ct.c_uint64),
179                ('pgoff', ct.c_uint64)]
180
181
182class SymbolStruct(ct.Structure):
183    """ Symbol info of the instruction hit by a sample or a callchain entry of a sample.
184        dso_name: path of the shared library containing the instruction.
185        vaddr_in_file: virtual address of the instruction in the shared library.
186        symbol_name: name of the function containing the instruction.
187        symbol_addr: start addr of the function containing the instruction.
188        symbol_len: length of the function in the shared library.
189        mapping: the mapping area hit by the instruction.
190    """
191    _fields_ = [('_dso_name', ct.c_char_p),
192                ('vaddr_in_file', ct.c_uint64),
193                ('_symbol_name', ct.c_char_p),
194                ('symbol_addr', ct.c_uint64),
195                ('symbol_len', ct.c_uint64),
196                ('mapping', ct.POINTER(MappingStruct))]
197
198    @property
199    def dso_name(self) -> str:
200        return _char_pt_to_str(self._dso_name)
201
202    @property
203    def symbol_name(self) -> str:
204        return _char_pt_to_str(self._symbol_name)
205
206
207class CallChainEntryStructure(ct.Structure):
208    """ A callchain entry of a sample.
209        ip: the address of the instruction of the callchain entry.
210        symbol: symbol info of the callchain entry.
211    """
212    _fields_ = [('ip', ct.c_uint64),
213                ('symbol', SymbolStruct)]
214
215
216class CallChainStructure(ct.Structure):
217    """ Callchain info of a sample.
218        nr: number of entries in the callchain.
219        entries: a pointer to an array of CallChainEntryStructure.
220
221        For example, if a sample is generated when a thread is running function C
222        with callchain function A -> function B -> function C.
223        Then nr = 2, and entries = [function B, function A].
224    """
225    _fields_ = [('nr', ct.c_uint32),
226                ('entries', ct.POINTER(CallChainEntryStructure))]
227
228
229class EventCounterStructure(ct.Structure):
230    """ An event counter.
231        name: the name of the event.
232        id: the id of the counter.
233        count: the count value for corresponding counter id.
234    """
235    _fields_ = [('_name', ct.c_char_p),
236                ('id', ct.c_uint64),
237                ('count', ct.c_uint64)]
238
239    @property
240    def name(self) -> str:
241        return _char_pt_to_str(self._name)
242
243class EventCountersViewStructure(ct.Structure):
244    """ An array of event counter.
245        nr: number of event counters in the array.
246        event_counter: a pointer to an array of EventCounterStructure.
247    """
248    _fields_ = [('nr', ct.c_size_t),
249                ('event_counter', ct.POINTER(EventCounterStructure))]
250
251
252
253class FeatureSectionStructure(ct.Structure):
254    """ A feature section in perf.data to store information like record cmd, device arch, etc.
255        data: a pointer to a buffer storing the section data.
256        data_size: data size in bytes.
257    """
258    _fields_ = [('data', ct.POINTER(ct.c_char)),
259                ('data_size', ct.c_uint32)]
260
261
262class BuildIdPairStructure(ct.Structure):
263    _fields_ = [("build_id", ct.POINTER(ct.c_char)),
264                ("filename", ct.c_char_p)]
265
266
267class DsoAddress(ct.Structure):
268    _fields_ = [('path', ct.c_char_p),
269                ('offset', ct.c_uint64)]
270
271
272class Thread(ct.Structure):
273    _fields_ = [('pid', ct.c_int),
274                ('tid', ct.c_int),
275                ('comm', ct.c_char_p)]
276
277
278class ReportLibStructure(ct.Structure):
279    _fields_ = []
280
281
282def SetReportOptionsForReportLib(report_lib, options: ReportLibOptions):
283    if options.proguard_mapping_files:
284        for file_path in options.proguard_mapping_files:
285            report_lib.AddProguardMappingFile(file_path)
286    if options.show_art_frames:
287        report_lib.ShowArtFrames(True)
288    if options.remove_method:
289        for name in options.remove_method:
290            report_lib.RemoveMethod(name)
291    if options.trace_offcpu:
292        report_lib.SetTraceOffCpuMode(options.trace_offcpu)
293    if options.sample_filters:
294        report_lib.SetSampleFilter(options.sample_filters)
295    if options.aggregate_threads:
296        report_lib.AggregateThreads(options.aggregate_threads)
297
298
299# pylint: disable=invalid-name
300class ReportLib(object):
301    """ Read contents from perf.data. """
302
303    def __init__(self, native_lib_path: Optional[str] = None):
304        if native_lib_path is None:
305            native_lib_path = self._get_native_lib()
306
307        self._load_dependent_lib()
308        self._lib = ct.CDLL(native_lib_path)
309        self._CreateReportLibFunc = self._lib.CreateReportLib
310        self._CreateReportLibFunc.restype = ct.POINTER(ReportLibStructure)
311        self._DestroyReportLibFunc = self._lib.DestroyReportLib
312        self._SetLogSeverityFunc = self._lib.SetLogSeverity
313        self._SetSymfsFunc = self._lib.SetSymfs
314        self._SetRecordFileFunc = self._lib.SetRecordFile
315        self._SetKallsymsFileFunc = self._lib.SetKallsymsFile
316        self._ShowIpForUnknownSymbolFunc = self._lib.ShowIpForUnknownSymbol
317        self._ShowArtFramesFunc = self._lib.ShowArtFrames
318        self._RemoveMethodFunc = self._lib.RemoveMethod
319        self._RemoveMethodFunc.restype = ct.c_bool
320        self._MergeJavaMethodsFunc = self._lib.MergeJavaMethods
321        self._AddProguardMappingFileFunc = self._lib.AddProguardMappingFile
322        self._AddProguardMappingFileFunc.restype = ct.c_bool
323        self._GetSupportedTraceOffCpuModesFunc = self._lib.GetSupportedTraceOffCpuModes
324        self._GetSupportedTraceOffCpuModesFunc.restype = ct.c_char_p
325        self._SetTraceOffCpuModeFunc = self._lib.SetTraceOffCpuMode
326        self._SetTraceOffCpuModeFunc.restype = ct.c_bool
327        self._SetSampleFilterFunc = self._lib.SetSampleFilter
328        self._SetSampleFilterFunc.restype = ct.c_bool
329        self._AggregateThreadsFunc = self._lib.AggregateThreads
330        self._AggregateThreadsFunc.restype = ct.c_bool
331        self._GetNextSampleFunc = self._lib.GetNextSample
332        self._GetNextSampleFunc.restype = ct.POINTER(SampleStruct)
333        self._GetEventOfCurrentSampleFunc = self._lib.GetEventOfCurrentSample
334        self._GetEventOfCurrentSampleFunc.restype = ct.POINTER(EventStruct)
335        self._GetSymbolOfCurrentSampleFunc = self._lib.GetSymbolOfCurrentSample
336        self._GetSymbolOfCurrentSampleFunc.restype = ct.POINTER(SymbolStruct)
337        self._GetCallChainOfCurrentSampleFunc = self._lib.GetCallChainOfCurrentSample
338        self._GetCallChainOfCurrentSampleFunc.restype = ct.POINTER(CallChainStructure)
339        self._GetEventCountersOfCurrentSampleFunc = self._lib.GetEventCountersOfCurrentSample
340        self._GetEventCountersOfCurrentSampleFunc.restype = ct.POINTER(EventCountersViewStructure)
341        self._GetTracingDataOfCurrentSampleFunc = self._lib.GetTracingDataOfCurrentSample
342        self._GetTracingDataOfCurrentSampleFunc.restype = ct.POINTER(ct.c_char)
343        self._GetProcessNameOfCurrentSampleFunc = self._lib.GetProcessNameOfCurrentSample
344        self._GetProcessNameOfCurrentSampleFunc.restype = ct.c_char_p
345        self._GetBuildIdForPathFunc = self._lib.GetBuildIdForPath
346        self._GetBuildIdForPathFunc.restype = ct.c_char_p
347        self._GetFeatureSection = self._lib.GetFeatureSection
348        self._GetFeatureSection.restype = ct.POINTER(FeatureSectionStructure)
349        self._GetAllBuildIds = self._lib.GetAllBuildIds
350        self._GetAllBuildIds.restype = ct.POINTER(BuildIdPairStructure)
351        self._ETMCallbackType = ct.CFUNCTYPE(
352            None, ct.c_uint8, ct.POINTER(etm.GenericTraceElement))
353        self._SetETMCallback = self._lib.SetETMCallback
354        self._SetETMCallback.argtypes = [ct.POINTER(ReportLibStructure), self._ETMCallbackType]
355        self._ConvertETMAddressToVaddrInFile = self._lib.ConvertETMAddressToVaddrInFile
356        self._ConvertETMAddressToVaddrInFile.restype = DsoAddress
357        self._ConvertETMAddressToVaddrInFile.argtypes = [
358            ct.POINTER(ReportLibStructure), ct.c_uint8, ct.c_uint64]
359        self._GetThread = self._lib.GetThread
360        self._GetThread.restype = Thread
361        self._GetThread.argtypes = [ct.POINTER(ReportLibStructure), ct.c_int]
362        self._ReadSymbolsForPath = self._lib.ReadSymbolsForPath
363        self._ReadSymbolsForPath.restype = ct.POINTER(SymbolStruct)
364        self._instance = self._CreateReportLibFunc()
365        assert not _is_null(self._instance)
366
367        self.meta_info: Optional[Dict[str, str]] = None
368        self.current_sample: Optional[SampleStruct] = None
369        self.record_cmd: Optional[str] = None
370        self.callback: Optional[ct._FuncPointer] = None
371
372    def _get_native_lib(self) -> str:
373        return get_host_binary_path('libsimpleperf_report.so')
374
375    def _load_dependent_lib(self):
376        # As the windows dll is built with mingw we need to load 'libwinpthread-1.dll'.
377        if is_windows():
378            self._libwinpthread = ct.CDLL(get_host_binary_path('libwinpthread-1.dll'))
379
380    def Close(self):
381        if self._instance:
382            self._DestroyReportLibFunc(self._instance)
383            self._instance = None
384
385    def SetReportOptions(self, options: ReportLibOptions):
386        """ Set report options in one call. """
387        SetReportOptionsForReportLib(self, options)
388
389    def SetLogSeverity(self, log_level: str = 'info'):
390        """ Set log severity of native lib, can be verbose,debug,info,error,fatal."""
391        cond: bool = self._SetLogSeverityFunc(self.getInstance(), _char_pt(log_level))
392        _check(cond, 'Failed to set log level')
393
394    def SetSymfs(self, symfs_dir: str):
395        """ Set directory used to find symbols."""
396        cond: bool = self._SetSymfsFunc(self.getInstance(), _char_pt(symfs_dir))
397        _check(cond, 'Failed to set symbols directory')
398
399    def SetRecordFile(self, record_file: str):
400        """ Set the path of record file, like perf.data."""
401        cond: bool = self._SetRecordFileFunc(self.getInstance(), _char_pt(record_file))
402        _check(cond, 'Failed to set record file')
403
404    def ShowIpForUnknownSymbol(self):
405        self._ShowIpForUnknownSymbolFunc(self.getInstance())
406
407    def ShowArtFrames(self, show: bool = True):
408        """ Show frames of internal methods of the Java interpreter. """
409        self._ShowArtFramesFunc(self.getInstance(), show)
410
411    def RemoveMethod(self, method_name_regex: str):
412        """ Remove methods with name containing method_name_regex. """
413        res = self._RemoveMethodFunc(self.getInstance(), _char_pt(method_name_regex))
414        _check(res, f'failed to call RemoveMethod({method_name_regex})')
415
416    def MergeJavaMethods(self, merge: bool = True):
417        """ This option merges jitted java methods with the same name but in different jit
418            symfiles. If possible, it also merges jitted methods with interpreted methods,
419            by mapping jitted methods to their corresponding dex files.
420            Side effects:
421              It only works at method level, not instruction level.
422              It makes symbol.vaddr_in_file and symbol.mapping not accurate for jitted methods.
423            Java methods are merged by default.
424        """
425        self._MergeJavaMethodsFunc(self.getInstance(), merge)
426
427    def AddProguardMappingFile(self, mapping_file: Union[str, Path]):
428        """ Add proguard mapping.txt to de-obfuscate method names. """
429        if not self._AddProguardMappingFileFunc(self.getInstance(), _char_pt(str(mapping_file))):
430            raise ValueError(f'failed to add proguard mapping file: {mapping_file}')
431
432    def SetKallsymsFile(self, kallsym_file: str):
433        """ Set the file path to a copy of the /proc/kallsyms file (for off device decoding) """
434        cond: bool = self._SetKallsymsFileFunc(self.getInstance(), _char_pt(kallsym_file))
435        _check(cond, 'Failed to set kallsyms file')
436
437    def GetSupportedTraceOffCpuModes(self) -> List[str]:
438        """ Get trace-offcpu modes supported by the recording file. It should be called after
439            SetRecordFile(). The modes are only available for profiles recorded with --trace-offcpu
440            option. All possible modes are:
441              on-cpu:           report on-cpu samples with period representing time spent on cpu
442              off-cpu:          report off-cpu samples with period representing time spent off cpu
443              on-off-cpu:       report both on-cpu samples and off-cpu samples, which can be split
444                                by event name.
445              mixed-on-off-cpu: report on-cpu and off-cpu samples under the same event name.
446        """
447        modes_str = self._GetSupportedTraceOffCpuModesFunc(self.getInstance())
448        _check(not _is_null(modes_str), 'Failed to call GetSupportedTraceOffCpuModes()')
449        modes_str = _char_pt_to_str(modes_str)
450        return modes_str.split(',') if modes_str else []
451
452    def SetTraceOffCpuMode(self, mode: str):
453        """ Set trace-offcpu mode. It should be called after SetRecordFile(). The mode should be
454            one of the modes returned by GetSupportedTraceOffCpuModes().
455        """
456        res: bool = self._SetTraceOffCpuModeFunc(self.getInstance(), _char_pt(mode))
457        _check(res, f'Failed to call SetTraceOffCpuMode({mode})')
458
459    def SetSampleFilter(self, filters: List[str]):
460        """ Set options used to filter samples. Available options are:
461            --exclude-pid pid1,pid2,...   Exclude samples for selected processes.
462            --exclude-tid tid1,tid2,...   Exclude samples for selected threads.
463            --exclude-process-name process_name_regex   Exclude samples for processes with name
464                                                        containing the regular expression.
465            --exclude-thread-name thread_name_regex     Exclude samples for threads with name
466                                                        containing the regular expression.
467            --include-pid pid1,pid2,...   Include samples for selected processes.
468            --include-tid tid1,tid2,...   Include samples for selected threads.
469            --include-process-name process_name_regex   Include samples for processes with name
470                                                        containing the regular expression.
471            --include-thread-name thread_name_regex     Include samples for threads with name
472                                                        containing the regular expression.
473            --filter-file <file>          Use filter file to filter samples based on timestamps. The
474                                          file format is in doc/sampler_filter.md.
475
476            The filter argument should be a concatenation of options.
477        """
478        filter_array = (ct.c_char_p * len(filters))()
479        filter_array[:] = [_char_pt(f) for f in filters]
480        res: bool = self._SetSampleFilterFunc(self.getInstance(), filter_array, len(filters))
481        _check(res, f'Failed to call SetSampleFilter({filters})')
482
483    def AggregateThreads(self, thread_name_regex_list: List[str]):
484        """ Given a list of thread name regex, threads with names matching the same regex are merged
485            into one thread. As a result, samples from different threads (like a thread pool) can be
486            shown in one flamegraph.
487        """
488        regex_array = (ct.c_char_p * len(thread_name_regex_list))()
489        regex_array[:] = [_char_pt(f) for f in thread_name_regex_list]
490        res: bool = self._AggregateThreadsFunc(
491            self.getInstance(),
492            regex_array, len(thread_name_regex_list))
493        _check(res, f'Failed to call AggregateThreads({thread_name_regex_list})')
494
495    def GetNextSample(self) -> Optional[SampleStruct]:
496        """ Return the next sample. If no more samples, return None. """
497        psample = self._GetNextSampleFunc(self.getInstance())
498        if _is_null(psample):
499            self.current_sample = None
500        else:
501            self.current_sample = psample[0]
502        return self.current_sample
503
504    def GetCurrentSample(self) -> Optional[SampleStruct]:
505        return self.current_sample
506
507    def GetEventOfCurrentSample(self) -> EventStruct:
508        event = self._GetEventOfCurrentSampleFunc(self.getInstance())
509        assert not _is_null(event)
510        return event[0]
511
512    def GetSymbolOfCurrentSample(self) -> SymbolStruct:
513        symbol = self._GetSymbolOfCurrentSampleFunc(self.getInstance())
514        assert not _is_null(symbol)
515        return symbol[0]
516
517    def GetCallChainOfCurrentSample(self) -> CallChainStructure:
518        callchain = self._GetCallChainOfCurrentSampleFunc(self.getInstance())
519        assert not _is_null(callchain)
520        return callchain[0]
521
522    def GetEventCountersOfCurrentSample(self) -> EventCountersViewStructure:
523        event_counters = self._GetEventCountersOfCurrentSampleFunc(self.getInstance())
524        assert not _is_null(event_counters)
525        return event_counters[0]
526
527    def GetTracingDataOfCurrentSample(self) -> Optional[Dict[str, Any]]:
528        data = self._GetTracingDataOfCurrentSampleFunc(self.getInstance())
529        if _is_null(data):
530            return None
531        event = self.GetEventOfCurrentSample()
532        result = collections.OrderedDict()
533        for i in range(event.tracing_data_format.field_count):
534            field = event.tracing_data_format.fields[i]
535            result[field.name] = field.parse_value(data)
536        return result
537
538    def GetProcessNameOfCurrentSample(self) -> str:
539        return _char_pt_to_str(self._GetProcessNameOfCurrentSampleFunc(self.getInstance()))
540
541    def GetBuildIdForPath(self, path: str) -> str:
542        build_id = self._GetBuildIdForPathFunc(self.getInstance(), _char_pt(path))
543        assert not _is_null(build_id)
544        return _char_pt_to_str(build_id)
545
546    def GetRecordCmd(self) -> str:
547        if self.record_cmd is not None:
548            return self.record_cmd
549        self.record_cmd = ''
550        feature_data = self._GetFeatureSection(self.getInstance(), _char_pt('cmdline'))
551        if not _is_null(feature_data):
552            void_p = ct.cast(feature_data[0].data, ct.c_void_p)
553            arg_count = ct.cast(void_p, ct.POINTER(ct.c_uint32)).contents.value
554            void_p.value += 4
555            args = []
556            for _ in range(arg_count):
557                str_len = ct.cast(void_p, ct.POINTER(ct.c_uint32)).contents.value
558                void_p.value += 4
559                char_p = ct.cast(void_p, ct.POINTER(ct.c_char))
560                current_str = ''
561                for j in range(str_len):
562                    c = bytes_to_str(char_p[j])
563                    if c != '\0':
564                        current_str += c
565                if ' ' in current_str:
566                    current_str = '"' + current_str + '"'
567                args.append(current_str)
568                void_p.value += str_len
569            self.record_cmd = ' '.join(args)
570        return self.record_cmd
571
572    def _GetFeatureString(self, feature_name: str) -> str:
573        feature_data = self._GetFeatureSection(self.getInstance(), _char_pt(feature_name))
574        result = ''
575        if not _is_null(feature_data):
576            void_p = ct.cast(feature_data[0].data, ct.c_void_p)
577            str_len = ct.cast(void_p, ct.POINTER(ct.c_uint32)).contents.value
578            void_p.value += 4
579            char_p = ct.cast(void_p, ct.POINTER(ct.c_char))
580            for i in range(str_len):
581                c = bytes_to_str(char_p[i])
582                if c == '\0':
583                    break
584                result += c
585        return result
586
587    def GetArch(self) -> str:
588        return self._GetFeatureString('arch')
589
590    def MetaInfo(self) -> Dict[str, str]:
591        """ Return a string to string map stored in meta_info section in perf.data.
592            It is used to pass some short meta information.
593        """
594        if self.meta_info is None:
595            self.meta_info = {}
596            feature_data = self._GetFeatureSection(self.getInstance(), _char_pt('meta_info'))
597            if not _is_null(feature_data):
598                str_list = []
599                data = feature_data[0].data
600                data_size = feature_data[0].data_size
601                current_str = ''
602                for i in range(data_size):
603                    c = bytes_to_str(data[i])
604                    if c != '\0':
605                        current_str += c
606                    else:
607                        str_list.append(current_str)
608                        current_str = ''
609                for i in range(0, len(str_list), 2):
610                    self.meta_info[str_list[i]] = str_list[i + 1]
611        return self.meta_info
612
613    def getInstance(self) -> ct._Pointer:
614        if self._instance is None:
615            raise Exception('Instance is Closed')
616        return self._instance
617
618    def GetAllBuildIds(self) -> Dict[str, str]:
619        """Return a dictionary mapping all filenames to their build ids.
620        """
621        ids = self._GetAllBuildIds(self.getInstance())
622        if not ids:
623            return {}
624
625        result = {}
626        i = 0
627        while ids[i].filename:
628            filename = _char_pt_to_str(ids[i].filename)
629            # A build_id is always 20 bytes long.
630            build_id = f"0x{ids[i].build_id[0:20].hex()}"
631            result[filename] = build_id
632            i += 1
633
634        return result
635
636    def SetETMCallback(self, callback: Callable[[int, etm.GenericTraceElement], None]) -> None:
637        """Set the callback to be called while decoding ETM traces. The callback will be called
638           for every element. The callback will receive the following parameters:
639           trace_id: CoreSight Trace ID that identifies the trace source.
640           elem: the decoded element as etm_types.GenericTraceElement.
641        """
642        def inner(trace_id, elem):
643            callback(trace_id, elem.contents)
644
645        # Save the callback, preventing GC from taking it.
646        self.callback = self._ETMCallbackType(inner)
647        self._SetETMCallback(self.getInstance(), self.callback)
648
649    def ConvertETMAddressToVaddrInFile(self, trace_id: int, addr: int) -> Optional[Tuple[str, int]]:
650        """Given the trace id and a virtual address in an ETM trace, return a tuple containing the
651           path to the DSO and the offset inside it. If the address is not mapped, return
652           None.
653        """
654        v = self._ConvertETMAddressToVaddrInFile(self.getInstance(), trace_id, addr)
655        if v.path:
656            path = _char_pt_to_str(v.path)
657            return (path, v.offset)
658        return None
659
660    def GetThread(self, tid: int) -> Optional[Tuple[int, int, str]]:
661        """Return a tuple containing PID, TID and comm for the given TID. If the thread is not found, return
662           None.
663        """
664        r = self._GetThread(self.getInstance(), tid)
665        if r.pid != -1:
666            return (r.pid, r.tid, _char_pt_to_str(r.comm))
667        else:
668            return None
669
670    def GetSymbols(self, path: str) -> Optional[List[Tuple[int, int, str]]]:
671        """Return a list of symbols for path, in the form of tuples of start address,
672           length and name.
673        """
674        symbols = self._ReadSymbolsForPath(self.getInstance(), _char_pt(path))
675        if not symbols:
676            return None
677
678        i = 0
679        result = []
680        while symbols[i]._symbol_name:
681            result.append((symbols[i].symbol_addr,
682                           symbols[i].symbol_len,
683                           symbols[i].symbol_name))
684            i += 1
685
686        return result
687
688
689ProtoSample = namedtuple('ProtoSample', ['ip', 'pid', 'tid',
690                         'thread_comm', 'time', 'in_kernel', 'cpu', 'period'])
691ProtoEvent = namedtuple('ProtoEvent', ['name', 'tracing_data_format'])
692ProtoSymbol = namedtuple(
693    'ProtoSymbol',
694    ['dso_name', 'vaddr_in_file', 'symbol_name', 'symbol_addr', 'symbol_len', 'mapping'])
695ProtoMapping = namedtuple('ProtoMapping', ['start', 'end', 'pgoff'])
696ProtoCallChain = namedtuple('ProtoCallChain', ['nr', 'entries'])
697ProtoCallChainEntry = namedtuple('ProtoCallChainEntry', ['ip', 'symbol'])
698
699
700class ProtoFileReportLib:
701    """ Read contents from profile in cmd_report_sample.proto format.
702        It is generated by `simpleperf report-sample`.
703    """
704
705    @staticmethod
706    def is_supported_format(record_file: str):
707        with open(record_file, 'rb') as fh:
708            if fh.read(10) == b'SIMPLEPERF':
709                return True
710
711    @staticmethod
712    def get_report_sample_pb2():
713        try:
714            import report_sample_pb2
715            return report_sample_pb2
716        except ImportError as e:
717            log_exit(f'{e}\nprotobuf package is missing or too old. Please install it like ' +
718                     '`pip install protobuf==4.21`.')
719
720    def __init__(self):
721        self.record_file = None
722        self.report_sample_pb2 = ProtoFileReportLib.get_report_sample_pb2()
723        self.records: List[self.report_sample_pb2.Record] = []
724        self.record_index = -1
725        self.files: List[self.report_sample_pb2.File] = []
726        self.thread_map: Dict[int, self.report_sample_pb2.Thread] = {}
727        self.meta_info: Optional[self.report_sample_pb2.MetaInfo] = None
728        self.fake_mapping_starts = []
729        self.sample_queue: List[self.report_sample_pb2.Sample] = collections.deque()
730        self.trace_offcpu_mode = None
731        # mapping from thread id to the last off-cpu sample in the thread
732        self.offcpu_samples = {}
733
734    def Close(self):
735        pass
736
737    def SetReportOptions(self, options: ReportLibOptions):
738        """ Set report options in one call. """
739        SetReportOptionsForReportLib(self, options)
740
741    def SetLogSeverity(self, log_level: str = 'info'):
742        pass
743
744    def SetSymfs(self, symfs_dir: str):
745        pass
746
747    def SetRecordFile(self, record_file: str):
748        self.record_file = record_file
749        with open(record_file, 'rb') as fh:
750            data = fh.read()
751        _check(data[:10] == b'SIMPLEPERF', f'magic number mismatch: {data[:10]}')
752        version = struct.unpack('<H', data[10:12])[0]
753        _check(version == 1, f'version mismatch: {version}')
754        i = 12
755        while i < len(data):
756            _check(i + 4 <= len(data), 'data format error')
757            size = struct.unpack('<I', data[i:i + 4])[0]
758            if size == 0:
759                break
760            i += 4
761            _check(i + size <= len(data), 'data format error')
762            record = self.report_sample_pb2.Record()
763            record.ParseFromString(data[i: i + size])
764            i += size
765            if record.HasField('sample') or record.HasField('context_switch'):
766                self.records.append(record)
767            elif record.HasField('file'):
768                self.files.append(record.file)
769            elif record.HasField('thread'):
770                self.thread_map[record.thread.thread_id] = record.thread
771            elif record.HasField('meta_info'):
772                self.meta_info = record.meta_info
773                if self.meta_info.trace_offcpu:
774                    self.trace_offcpu_mode = 'mixed-on-off-cpu'
775        fake_mapping_start = 0
776        for file in self.files:
777            self.fake_mapping_starts.append(fake_mapping_start)
778            fake_mapping_start += len(file.symbol) + 1
779
780    def AddProguardMappingFile(self, mapping_file: Union[str, Path]):
781        """ Add proguard mapping.txt to de-obfuscate method names. """
782        raise NotImplementedError(
783            'Adding proguard mapping files are not implemented for report_sample profiles')
784
785    def ShowIpForUnknownSymbol(self):
786        pass
787
788    def ShowArtFrames(self, show: bool = True):
789        raise NotImplementedError(
790            'Showing art frames are not implemented for report_sample profiles')
791
792    def RemoveMethod(self, method_name_regex: str):
793        """ Remove methods with name containing method_name_regex. """
794        raise NotImplementedError("Removing method isn't implemented for report_sample profiles")
795
796    def SetSampleFilter(self, filters: List[str]):
797        raise NotImplementedError('sample filters are not implemented for report_sample profiles')
798
799    def GetSupportedTraceOffCpuModes(self) -> List[str]:
800        """ Get trace-offcpu modes supported by the recording file. It should be called after
801            SetRecordFile(). The modes are only available for profiles recorded with --trace-offcpu
802            option. All possible modes are:
803              on-cpu:           report on-cpu samples with period representing time spent on cpu
804              off-cpu:          report off-cpu samples with period representing time spent off cpu
805              on-off-cpu:       report both on-cpu samples and off-cpu samples, which can be split
806                                by event name.
807              mixed-on-off-cpu: report on-cpu and off-cpu samples under the same event name.
808        """
809        _check(self.meta_info,
810               'GetSupportedTraceOffCpuModes() should be called after SetRecordFile()')
811        if self.meta_info.trace_offcpu:
812            return ['on-cpu', 'off-cpu', 'on-off-cpu', 'mixed-on-off-cpu']
813        return []
814
815    def SetTraceOffCpuMode(self, mode: str):
816        """ Set trace-offcpu mode. It should be called after SetRecordFile().
817        """
818        _check(mode in ['on-cpu', 'off-cpu', 'on-off-cpu', 'mixed-on-off-cpu'], 'invalide mode')
819        # Don't check if mode is in self.GetSupportedTraceOffCpuModes(). Because the profile may
820        # be generated by an old simpleperf.
821        self.trace_offcpu_mode = mode
822
823    def AggregateThreads(self, thread_name_regex_list: List[str]):
824        """ Given a list of thread name regex, threads with names matching the same regex are merged
825            into one thread. As a result, samples from different threads (like a thread pool) can be
826            shown in one flamegraph.
827        """
828        raise NotImplementedError(
829            'Aggregating threads are not implemented for report_sample profiles')
830
831    def GetNextSample(self) -> Optional[ProtoSample]:
832        if self.sample_queue:
833            self.sample_queue.popleft()
834        while not self.sample_queue:
835            self.record_index += 1
836            if self.record_index >= len(self.records):
837                break
838            record = self.records[self.record_index]
839            if record.HasField('sample'):
840                self._process_sample_record(record.sample)
841            elif record.HasField('context_switch'):
842                self._process_context_switch(record.context_switch)
843        return self.GetCurrentSample()
844
845    def _process_sample_record(self, sample) -> None:
846        if not self.trace_offcpu_mode:
847            self._add_to_sample_queue(sample)
848            return
849        event_name = self._get_event_name(sample.event_type_id)
850        is_offcpu = 'sched_switch' in event_name
851
852        if self.trace_offcpu_mode == 'on-cpu':
853            if not is_offcpu:
854                self._add_to_sample_queue(sample)
855            return
856
857        if prev_offcpu_sample := self.offcpu_samples.get(sample.thread_id):
858            # If there is a previous off-cpu sample, update its period.
859            prev_offcpu_sample.event_count = max(sample.time - prev_offcpu_sample.time, 1)
860            self._add_to_sample_queue(prev_offcpu_sample)
861
862        if is_offcpu:
863            self.offcpu_samples[sample.thread_id] = sample
864        else:
865            self.offcpu_samples[sample.thread_id] = None
866            if self.trace_offcpu_mode in ('on-off-cpu', 'mixed-on-off-cpu'):
867                self._add_to_sample_queue(sample)
868
869    def _process_context_switch(self, context_switch) -> None:
870        if not context_switch.switch_on:
871            return
872        if prev_offcpu_sample := self.offcpu_samples.get(context_switch.thread_id):
873            prev_offcpu_sample.event_count = max(context_switch.time - prev_offcpu_sample.time, 1)
874            self.offcpu_samples[context_switch.thread_id] = None
875            self._add_to_sample_queue(prev_offcpu_sample)
876
877    def _add_to_sample_queue(self, sample) -> None:
878        self.sample_queue.append(sample)
879
880    def GetCurrentSample(self) -> Optional[ProtoSample]:
881        if not self.sample_queue:
882            return None
883        sample = self.sample_queue[0]
884        thread = self.thread_map[sample.thread_id]
885        return ProtoSample(
886            ip=0, pid=thread.process_id, tid=thread.thread_id, thread_comm=thread.thread_name,
887            time=sample.time, in_kernel=False, cpu=0, period=sample.event_count)
888
889    def GetEventOfCurrentSample(self) -> ProtoEvent:
890        sample = self.sample_queue[0]
891        event_type_id = 0 if self.trace_offcpu_mode == 'mixed-on-off-cpu' else sample.event_type_id
892        event_name = self._get_event_name(event_type_id)
893        return ProtoEvent(name=event_name, tracing_data_format=None)
894
895    def _get_event_name(self, event_type_id: int) -> str:
896        return self.meta_info.event_type[event_type_id]
897
898    def GetSymbolOfCurrentSample(self) -> ProtoSymbol:
899        sample = self.sample_queue[0]
900        node = sample.callchain[0]
901        return self._build_symbol(node)
902
903    def GetCallChainOfCurrentSample(self) -> ProtoCallChain:
904        entries = []
905        sample = self.sample_queue[0]
906        for node in sample.callchain[1:]:
907            symbol = self._build_symbol(node)
908            entries.append(ProtoCallChainEntry(ip=0, symbol=symbol))
909        return ProtoCallChain(nr=len(entries), entries=entries)
910
911    def _build_symbol(self, node) -> ProtoSymbol:
912        file = self.files[node.file_id]
913        if node.symbol_id == -1:
914            symbol_name = 'unknown'
915            fake_symbol_addr = self.fake_mapping_starts[node.file_id] + len(file.symbol)
916            fake_symbol_pgoff = 0
917        else:
918            symbol_name = file.symbol[node.symbol_id]
919            fake_symbol_addr = self.fake_mapping_starts[node.file_id] = node.symbol_id + 1
920            fake_symbol_pgoff = node.symbol_id + 1
921        mapping = ProtoMapping(fake_symbol_addr, 1, fake_symbol_pgoff)
922        return ProtoSymbol(dso_name=file.path, vaddr_in_file=node.vaddr_in_file,
923                           symbol_name=symbol_name, symbol_addr=0, symbol_len=1, mapping=[mapping])
924
925    def GetBuildIdForPath(self, path: str) -> str:
926        return ''
927
928    def GetRecordCmd(self) -> str:
929        return ''
930
931    def GetArch(self) -> str:
932        return ''
933
934    def MetaInfo(self) -> Dict[str, str]:
935        return {}
936
937
938def GetReportLib(record_file: str) -> Union[ReportLib, ProtoFileReportLib]:
939    if ProtoFileReportLib.is_supported_format(record_file):
940        lib = ProtoFileReportLib()
941    else:
942        lib = ReportLib()
943    lib.SetRecordFile(record_file)
944    return lib
945