1#!/usr/bin/env python3 2# 3# Copyright (C) 2016 The Android Open Source Project 4# 5# Licensed under the Apache License, Version 2.0 (the "License"); 6# you may not use this file except in compliance with the License. 7# You may obtain a copy of the License at 8# 9# http://www.apache.org/licenses/LICENSE-2.0 10# 11# Unless required by applicable law or agreed to in writing, software 12# distributed under the License is distributed on an "AS IS" BASIS, 13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14# See the License for the specific language governing permissions and 15# limitations under the License. 16# 17 18"""simpleperf_report_lib.py: a python wrapper of libsimpleperf_report.so. 19 Used to access samples in perf.data. 20 21""" 22 23import collections 24import ctypes as ct 25from pathlib import Path 26import struct 27from typing import Any, Dict, List, Optional, Union 28 29from simpleperf_utils import bytes_to_str, get_host_binary_path, is_windows, str_to_bytes 30 31 32def _is_null(p: Optional[ct._Pointer]) -> bool: 33 if p: 34 return False 35 return ct.cast(p, ct.c_void_p).value is None 36 37 38def _char_pt(s: str) -> bytes: 39 return str_to_bytes(s) 40 41 42def _char_pt_to_str(char_pt: ct.c_char_p) -> str: 43 return bytes_to_str(char_pt) 44 45 46def _check(cond: bool, failmsg: str): 47 if not cond: 48 raise RuntimeError(failmsg) 49 50 51class SampleStruct(ct.Structure): 52 """ Instance of a sample in perf.data. 53 ip: the program counter of the thread generating the sample. 54 pid: process id (or thread group id) of the thread generating the sample. 55 tid: thread id. 56 thread_comm: thread name. 57 time: time at which the sample was generated. The value is in nanoseconds. 58 The clock is decided by the --clockid option in `simpleperf record`. 59 in_kernel: whether the instruction is in kernel space or user space. 60 cpu: the cpu generating the sample. 61 period: count of events have happened since last sample. For example, if we use 62 -e cpu-cycles, it means how many cpu-cycles have happened. 63 If we use -e cpu-clock, it means how many nanoseconds have passed. 64 """ 65 _fields_ = [('ip', ct.c_uint64), 66 ('pid', ct.c_uint32), 67 ('tid', ct.c_uint32), 68 ('_thread_comm', ct.c_char_p), 69 ('time', ct.c_uint64), 70 ('in_kernel', ct.c_uint32), 71 ('cpu', ct.c_uint32), 72 ('period', ct.c_uint64)] 73 74 @property 75 def thread_comm(self) -> str: 76 return _char_pt_to_str(self._thread_comm) 77 78 79class TracingFieldFormatStruct(ct.Structure): 80 """Format of a tracing field. 81 name: name of the field. 82 offset: offset of the field in tracing data. 83 elem_size: size of the element type. 84 elem_count: the number of elements in this field, more than one if the field is an array. 85 is_signed: whether the element type is signed or unsigned. 86 is_dynamic: whether the element is a dynamic string. 87 """ 88 _fields_ = [('_name', ct.c_char_p), 89 ('offset', ct.c_uint32), 90 ('elem_size', ct.c_uint32), 91 ('elem_count', ct.c_uint32), 92 ('is_signed', ct.c_uint32), 93 ('is_dynamic', ct.c_uint32)] 94 95 _unpack_key_dict = {1: 'b', 2: 'h', 4: 'i', 8: 'q'} 96 97 @property 98 def name(self) -> str: 99 return _char_pt_to_str(self._name) 100 101 def parse_value(self, data: ct.c_char_p) -> Union[str, bytes, List[bytes]]: 102 """ Parse value of a field in a tracepoint event. 103 The return value depends on the type of the field, and can be an int value, a string, 104 an array of int values, etc. If the type can't be parsed, return a byte array or an 105 array of byte arrays. 106 """ 107 if self.is_dynamic: 108 offset, max_len = struct.unpack('<HH', data[self.offset:self.offset + 4]) 109 length = 0 110 while length < max_len and bytes_to_str(data[offset + length]) != '\x00': 111 length += 1 112 return bytes_to_str(data[offset: offset + length]) 113 114 if self.elem_count > 1 and self.elem_size == 1: 115 # Probably the field is a string. 116 # Don't use self.is_signed, which has different values on x86 and arm. 117 length = 0 118 while length < self.elem_count and bytes_to_str(data[self.offset + length]) != '\x00': 119 length += 1 120 return bytes_to_str(data[self.offset: self.offset + length]) 121 unpack_key = self._unpack_key_dict.get(self.elem_size) 122 if unpack_key: 123 if not self.is_signed: 124 unpack_key = unpack_key.upper() 125 value = struct.unpack('%d%s' % (self.elem_count, unpack_key), 126 data[self.offset:self.offset + self.elem_count * self.elem_size]) 127 else: 128 # Since we don't know the element type, just return the bytes. 129 value = [] 130 offset = self.offset 131 for _ in range(self.elem_count): 132 value.append(data[offset: offset + self.elem_size]) 133 offset += self.elem_size 134 if self.elem_count == 1: 135 value = value[0] 136 return value 137 138 139class TracingDataFormatStruct(ct.Structure): 140 """Format of tracing data of a tracepoint event, like 141 https://www.kernel.org/doc/html/latest/trace/events.html#event-formats. 142 size: total size of all fields in the tracing data. 143 field_count: the number of fields. 144 fields: an array of fields. 145 """ 146 _fields_ = [('size', ct.c_uint32), 147 ('field_count', ct.c_uint32), 148 ('fields', ct.POINTER(TracingFieldFormatStruct))] 149 150 151class EventStruct(ct.Structure): 152 """Event type of a sample. 153 name: name of the event type. 154 tracing_data_format: only available when it is a tracepoint event. 155 """ 156 _fields_ = [('_name', ct.c_char_p), 157 ('tracing_data_format', TracingDataFormatStruct)] 158 159 @property 160 def name(self) -> str: 161 return _char_pt_to_str(self._name) 162 163 164class MappingStruct(ct.Structure): 165 """ A mapping area in the monitored threads, like the content in /proc/<pid>/maps. 166 start: start addr in memory. 167 end: end addr in memory. 168 pgoff: offset in the mapped shared library. 169 """ 170 _fields_ = [('start', ct.c_uint64), 171 ('end', ct.c_uint64), 172 ('pgoff', ct.c_uint64)] 173 174 175class SymbolStruct(ct.Structure): 176 """ Symbol info of the instruction hit by a sample or a callchain entry of a sample. 177 dso_name: path of the shared library containing the instruction. 178 vaddr_in_file: virtual address of the instruction in the shared library. 179 symbol_name: name of the function containing the instruction. 180 symbol_addr: start addr of the function containing the instruction. 181 symbol_len: length of the function in the shared library. 182 mapping: the mapping area hit by the instruction. 183 """ 184 _fields_ = [('_dso_name', ct.c_char_p), 185 ('vaddr_in_file', ct.c_uint64), 186 ('_symbol_name', ct.c_char_p), 187 ('symbol_addr', ct.c_uint64), 188 ('symbol_len', ct.c_uint64), 189 ('mapping', ct.POINTER(MappingStruct))] 190 191 @property 192 def dso_name(self) -> str: 193 return _char_pt_to_str(self._dso_name) 194 195 @property 196 def symbol_name(self) -> str: 197 return _char_pt_to_str(self._symbol_name) 198 199 200class CallChainEntryStructure(ct.Structure): 201 """ A callchain entry of a sample. 202 ip: the address of the instruction of the callchain entry. 203 symbol: symbol info of the callchain entry. 204 """ 205 _fields_ = [('ip', ct.c_uint64), 206 ('symbol', SymbolStruct)] 207 208 209class CallChainStructure(ct.Structure): 210 """ Callchain info of a sample. 211 nr: number of entries in the callchain. 212 entries: a pointer to an array of CallChainEntryStructure. 213 214 For example, if a sample is generated when a thread is running function C 215 with callchain function A -> function B -> function C. 216 Then nr = 2, and entries = [function B, function A]. 217 """ 218 _fields_ = [('nr', ct.c_uint32), 219 ('entries', ct.POINTER(CallChainEntryStructure))] 220 221 222class FeatureSectionStructure(ct.Structure): 223 """ A feature section in perf.data to store information like record cmd, device arch, etc. 224 data: a pointer to a buffer storing the section data. 225 data_size: data size in bytes. 226 """ 227 _fields_ = [('data', ct.POINTER(ct.c_char)), 228 ('data_size', ct.c_uint32)] 229 230 231class ReportLibStructure(ct.Structure): 232 _fields_ = [] 233 234 235# pylint: disable=invalid-name 236class ReportLib(object): 237 238 def __init__(self, native_lib_path: Optional[str] = None): 239 if native_lib_path is None: 240 native_lib_path = self._get_native_lib() 241 242 self._load_dependent_lib() 243 self._lib = ct.CDLL(native_lib_path) 244 self._CreateReportLibFunc = self._lib.CreateReportLib 245 self._CreateReportLibFunc.restype = ct.POINTER(ReportLibStructure) 246 self._DestroyReportLibFunc = self._lib.DestroyReportLib 247 self._SetLogSeverityFunc = self._lib.SetLogSeverity 248 self._SetSymfsFunc = self._lib.SetSymfs 249 self._SetRecordFileFunc = self._lib.SetRecordFile 250 self._SetKallsymsFileFunc = self._lib.SetKallsymsFile 251 self._ShowIpForUnknownSymbolFunc = self._lib.ShowIpForUnknownSymbol 252 self._ShowArtFramesFunc = self._lib.ShowArtFrames 253 self._MergeJavaMethodsFunc = self._lib.MergeJavaMethods 254 self._AddProguardMappingFileFunc = self._lib.AddProguardMappingFile 255 self._AddProguardMappingFileFunc.restype = ct.c_bool 256 self._GetNextSampleFunc = self._lib.GetNextSample 257 self._GetNextSampleFunc.restype = ct.POINTER(SampleStruct) 258 self._GetEventOfCurrentSampleFunc = self._lib.GetEventOfCurrentSample 259 self._GetEventOfCurrentSampleFunc.restype = ct.POINTER(EventStruct) 260 self._GetSymbolOfCurrentSampleFunc = self._lib.GetSymbolOfCurrentSample 261 self._GetSymbolOfCurrentSampleFunc.restype = ct.POINTER(SymbolStruct) 262 self._GetCallChainOfCurrentSampleFunc = self._lib.GetCallChainOfCurrentSample 263 self._GetCallChainOfCurrentSampleFunc.restype = ct.POINTER(CallChainStructure) 264 self._GetTracingDataOfCurrentSampleFunc = self._lib.GetTracingDataOfCurrentSample 265 self._GetTracingDataOfCurrentSampleFunc.restype = ct.POINTER(ct.c_char) 266 self._GetBuildIdForPathFunc = self._lib.GetBuildIdForPath 267 self._GetBuildIdForPathFunc.restype = ct.c_char_p 268 self._GetFeatureSection = self._lib.GetFeatureSection 269 self._GetFeatureSection.restype = ct.POINTER(FeatureSectionStructure) 270 self._instance = self._CreateReportLibFunc() 271 assert not _is_null(self._instance) 272 273 self.meta_info: Optional[Dict[str, str]] = None 274 self.current_sample: Optional[SampleStruct] = None 275 self.record_cmd: Optional[str] = None 276 277 def _get_native_lib(self) -> str: 278 return get_host_binary_path('libsimpleperf_report.so') 279 280 def _load_dependent_lib(self): 281 # As the windows dll is built with mingw we need to load 'libwinpthread-1.dll'. 282 if is_windows(): 283 self._libwinpthread = ct.CDLL(get_host_binary_path('libwinpthread-1.dll')) 284 285 def Close(self): 286 if self._instance: 287 self._DestroyReportLibFunc(self._instance) 288 self._instance = None 289 290 def SetLogSeverity(self, log_level: str = 'info'): 291 """ Set log severity of native lib, can be verbose,debug,info,error,fatal.""" 292 cond: bool = self._SetLogSeverityFunc(self.getInstance(), _char_pt(log_level)) 293 _check(cond, 'Failed to set log level') 294 295 def SetSymfs(self, symfs_dir: str): 296 """ Set directory used to find symbols.""" 297 cond: bool = self._SetSymfsFunc(self.getInstance(), _char_pt(symfs_dir)) 298 _check(cond, 'Failed to set symbols directory') 299 300 def SetRecordFile(self, record_file: str): 301 """ Set the path of record file, like perf.data.""" 302 cond: bool = self._SetRecordFileFunc(self.getInstance(), _char_pt(record_file)) 303 _check(cond, 'Failed to set record file') 304 305 def ShowIpForUnknownSymbol(self): 306 self._ShowIpForUnknownSymbolFunc(self.getInstance()) 307 308 def ShowArtFrames(self, show: bool = True): 309 """ Show frames of internal methods of the Java interpreter. """ 310 self._ShowArtFramesFunc(self.getInstance(), show) 311 312 def MergeJavaMethods(self, merge: bool = True): 313 """ This option merges jitted java methods with the same name but in different jit 314 symfiles. If possible, it also merges jitted methods with interpreted methods, 315 by mapping jitted methods to their corresponding dex files. 316 Side effects: 317 It only works at method level, not instruction level. 318 It makes symbol.vaddr_in_file and symbol.mapping not accurate for jitted methods. 319 Java methods are merged by default. 320 """ 321 self._MergeJavaMethodsFunc(self.getInstance(), merge) 322 323 def AddProguardMappingFile(self, mapping_file: Union[str, Path]): 324 """ Add proguard mapping.txt to de-obfuscate method names. """ 325 if not self._AddProguardMappingFileFunc(self.getInstance(), _char_pt(str(mapping_file))): 326 raise ValueError(f'failed to add proguard mapping file: {mapping_file}') 327 328 def SetKallsymsFile(self, kallsym_file: str): 329 """ Set the file path to a copy of the /proc/kallsyms file (for off device decoding) """ 330 cond: bool = self._SetKallsymsFileFunc(self.getInstance(), _char_pt(kallsym_file)) 331 _check(cond, 'Failed to set kallsyms file') 332 333 def GetNextSample(self) -> Optional[SampleStruct]: 334 """ Return the next sample. If no more samples, return None. """ 335 psample = self._GetNextSampleFunc(self.getInstance()) 336 if _is_null(psample): 337 self.current_sample = None 338 else: 339 self.current_sample = psample[0] 340 return self.current_sample 341 342 def GetCurrentSample(self) -> Optional[SampleStruct]: 343 return self.current_sample 344 345 def GetEventOfCurrentSample(self) -> EventStruct: 346 event = self._GetEventOfCurrentSampleFunc(self.getInstance()) 347 assert not _is_null(event) 348 return event[0] 349 350 def GetSymbolOfCurrentSample(self) -> SymbolStruct: 351 symbol = self._GetSymbolOfCurrentSampleFunc(self.getInstance()) 352 assert not _is_null(symbol) 353 return symbol[0] 354 355 def GetCallChainOfCurrentSample(self) -> CallChainStructure: 356 callchain = self._GetCallChainOfCurrentSampleFunc(self.getInstance()) 357 assert not _is_null(callchain) 358 return callchain[0] 359 360 def GetTracingDataOfCurrentSample(self) -> Optional[Dict[str, Any]]: 361 data = self._GetTracingDataOfCurrentSampleFunc(self.getInstance()) 362 if _is_null(data): 363 return None 364 event = self.GetEventOfCurrentSample() 365 result = collections.OrderedDict() 366 for i in range(event.tracing_data_format.field_count): 367 field = event.tracing_data_format.fields[i] 368 result[field.name] = field.parse_value(data) 369 return result 370 371 def GetBuildIdForPath(self, path: str) -> str: 372 build_id = self._GetBuildIdForPathFunc(self.getInstance(), _char_pt(path)) 373 assert not _is_null(build_id) 374 return _char_pt_to_str(build_id) 375 376 def GetRecordCmd(self) -> str: 377 if self.record_cmd is not None: 378 return self.record_cmd 379 self.record_cmd = '' 380 feature_data = self._GetFeatureSection(self.getInstance(), _char_pt('cmdline')) 381 if not _is_null(feature_data): 382 void_p = ct.cast(feature_data[0].data, ct.c_void_p) 383 arg_count = ct.cast(void_p, ct.POINTER(ct.c_uint32)).contents.value 384 void_p.value += 4 385 args = [] 386 for _ in range(arg_count): 387 str_len = ct.cast(void_p, ct.POINTER(ct.c_uint32)).contents.value 388 void_p.value += 4 389 char_p = ct.cast(void_p, ct.POINTER(ct.c_char)) 390 current_str = '' 391 for j in range(str_len): 392 c = bytes_to_str(char_p[j]) 393 if c != '\0': 394 current_str += c 395 if ' ' in current_str: 396 current_str = '"' + current_str + '"' 397 args.append(current_str) 398 void_p.value += str_len 399 self.record_cmd = ' '.join(args) 400 return self.record_cmd 401 402 def _GetFeatureString(self, feature_name: str) -> str: 403 feature_data = self._GetFeatureSection(self.getInstance(), _char_pt(feature_name)) 404 result = '' 405 if not _is_null(feature_data): 406 void_p = ct.cast(feature_data[0].data, ct.c_void_p) 407 str_len = ct.cast(void_p, ct.POINTER(ct.c_uint32)).contents.value 408 void_p.value += 4 409 char_p = ct.cast(void_p, ct.POINTER(ct.c_char)) 410 for i in range(str_len): 411 c = bytes_to_str(char_p[i]) 412 if c == '\0': 413 break 414 result += c 415 return result 416 417 def GetArch(self) -> str: 418 return self._GetFeatureString('arch') 419 420 def MetaInfo(self) -> Dict[str, str]: 421 """ Return a string to string map stored in meta_info section in perf.data. 422 It is used to pass some short meta information. 423 """ 424 if self.meta_info is None: 425 self.meta_info = {} 426 feature_data = self._GetFeatureSection(self.getInstance(), _char_pt('meta_info')) 427 if not _is_null(feature_data): 428 str_list = [] 429 data = feature_data[0].data 430 data_size = feature_data[0].data_size 431 current_str = '' 432 for i in range(data_size): 433 c = bytes_to_str(data[i]) 434 if c != '\0': 435 current_str += c 436 else: 437 str_list.append(current_str) 438 current_str = '' 439 for i in range(0, len(str_list), 2): 440 self.meta_info[str_list[i]] = str_list[i + 1] 441 return self.meta_info 442 443 def getInstance(self) -> ct._Pointer: 444 if self._instance is None: 445 raise Exception('Instance is Closed') 446 return self._instance 447