1#!/usr/bin/env python3 2# 3# Copyright (C) 2021 The Android Open Source Project 4# 5# Licensed under the Apache License, Version 2.0 (the "License"); 6# you may not use this file except in compliance with the License. 7# You may obtain a copy of the License at 8# 9# http://www.apache.org/licenses/LICENSE-2.0 10# 11# Unless required by applicable law or agreed to in writing, software 12# distributed under the License is distributed on an "AS IS" BASIS, 13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14# See the License for the specific language governing permissions and 15# limitations under the License. 16# 17 18"""gecko_profile_generator.py: converts perf.data to Gecko Profile Format, 19 which can be read by https://profiler.firefox.com/. 20 21 Example: 22 ./app_profiler.py 23 ./gecko_profile_generator.py | gzip > gecko-profile.json.gz 24 25 Then open gecko-profile.json.gz in https://profiler.firefox.com/ 26""" 27 28from collections import Counter 29from dataclasses import dataclass, field 30import json 31import logging 32import sys 33from typing import List, Dict, Optional, NamedTuple, Tuple 34 35from simpleperf_report_lib import ReportLib 36from simpleperf_utils import BaseArgumentParser, ReportLibOptions 37 38 39StringID = int 40StackID = int 41FrameID = int 42CategoryID = int 43Milliseconds = float 44GeckoProfile = Dict 45 46 47# https://github.com/firefox-devtools/profiler/blob/53970305b51b9b472e26d7457fee1d66cd4e2737/src/types/gecko-profile.js#L156 48class Frame(NamedTuple): 49 string_id: StringID 50 relevantForJS: bool 51 innerWindowID: int 52 implementation: None 53 optimizations: None 54 line: None 55 column: None 56 category: CategoryID 57 subcategory: int 58 59 60# https://github.com/firefox-devtools/profiler/blob/53970305b51b9b472e26d7457fee1d66cd4e2737/src/types/gecko-profile.js#L216 61class Stack(NamedTuple): 62 prefix_id: Optional[StackID] 63 frame_id: FrameID 64 category_id: CategoryID 65 66 67# https://github.com/firefox-devtools/profiler/blob/53970305b51b9b472e26d7457fee1d66cd4e2737/src/types/gecko-profile.js#L90 68class Sample(NamedTuple): 69 stack_id: Optional[StackID] 70 time_ms: Milliseconds 71 responsiveness: int 72 complete_stack: bool 73 74 def to_json(self): 75 return [self.stack_id, self.time_ms, self.responsiveness] 76 77 78# Schema: https://github.com/firefox-devtools/profiler/blob/53970305b51b9b472e26d7457fee1d66cd4e2737/src/types/profile.js#L425 79# Colors must be defined in: 80# https://github.com/firefox-devtools/profiler/blob/50124adbfa488adba6e2674a8f2618cf34b59cd2/res/css/categories.css 81CATEGORIES = [ 82 { 83 "name": 'User', 84 # Follow Brendan Gregg's Flamegraph convention: yellow for userland 85 # https://github.com/brendangregg/FlameGraph/blob/810687f180f3c4929b5d965f54817a5218c9d89b/flamegraph.pl#L419 86 "color": 'yellow', 87 "subcategories": ['Other'] 88 }, 89 { 90 "name": 'Kernel', 91 # Follow Brendan Gregg's Flamegraph convention: orange for kernel 92 # https://github.com/brendangregg/FlameGraph/blob/810687f180f3c4929b5d965f54817a5218c9d89b/flamegraph.pl#L417 93 "color": 'orange', 94 "subcategories": ['Other'] 95 }, 96 { 97 "name": 'Native', 98 # Follow Brendan Gregg's Flamegraph convention: yellow for userland 99 # https://github.com/brendangregg/FlameGraph/blob/810687f180f3c4929b5d965f54817a5218c9d89b/flamegraph.pl#L419 100 "color": 'yellow', 101 "subcategories": ['Other'] 102 }, 103 { 104 "name": 'DEX', 105 # Follow Brendan Gregg's Flamegraph convention: green for Java/JIT 106 # https://github.com/brendangregg/FlameGraph/blob/810687f180f3c4929b5d965f54817a5218c9d89b/flamegraph.pl#L411 107 "color": 'green', 108 "subcategories": ['Other'] 109 }, 110 { 111 "name": 'OAT', 112 # Follow Brendan Gregg's Flamegraph convention: green for Java/JIT 113 # https://github.com/brendangregg/FlameGraph/blob/810687f180f3c4929b5d965f54817a5218c9d89b/flamegraph.pl#L411 114 "color": 'green', 115 "subcategories": ['Other'] 116 }, 117 { 118 "name": 'Off-CPU', 119 # Follow Brendan Gregg's Flamegraph convention: blue for off-CPU 120 # https://github.com/brendangregg/FlameGraph/blob/810687f180f3c4929b5d965f54817a5218c9d89b/flamegraph.pl#L470 121 "color": 'blue', 122 "subcategories": ['Other'] 123 }, 124 # Not used by this exporter yet, but some Firefox Profiler code assumes 125 # there is an 'Other' category by searching for a category with 126 # color=grey, so include this. 127 { 128 "name": 'Other', 129 "color": 'grey', 130 "subcategories": ['Other'] 131 }, 132] 133 134 135def is_complete_stack(stack: List[str]) -> bool: 136 """ Check if the callstack is complete. The stack starts from root. """ 137 for entry in stack: 138 if ('__libc_init' in entry) or ('__start_thread' in entry): 139 return True 140 return False 141 142 143@dataclass 144class Thread: 145 """A builder for a profile of a single thread. 146 147 Attributes: 148 comm: Thread command-line (name). 149 pid: process ID of containing process. 150 tid: thread ID. 151 samples: Timeline of profile samples. 152 frameTable: interned stack frame ID -> stack frame. 153 stringTable: interned string ID -> string. 154 stringMap: interned string -> string ID. 155 stackTable: interned stack ID -> stack. 156 stackMap: (stack prefix ID, leaf stack frame ID) -> interned Stack ID. 157 frameMap: Stack Frame string -> interned Frame ID. 158 """ 159 comm: str 160 pid: int 161 tid: int 162 samples: List[Sample] = field(default_factory=list) 163 frameTable: List[Frame] = field(default_factory=list) 164 stringTable: List[str] = field(default_factory=list) 165 # TODO: this is redundant with frameTable, could we remove this? 166 stringMap: Dict[str, int] = field(default_factory=dict) 167 stackTable: List[Stack] = field(default_factory=list) 168 stackMap: Dict[Tuple[Optional[int], int], int] = field(default_factory=dict) 169 frameMap: Dict[str, int] = field(default_factory=dict) 170 171 def _intern_stack(self, frame_id: int, prefix_id: Optional[int]) -> int: 172 """Gets a matching stack, or saves the new stack. Returns a Stack ID.""" 173 key = (prefix_id, frame_id) 174 stack_id = self.stackMap.get(key) 175 if stack_id is not None: 176 return stack_id 177 stack_id = len(self.stackTable) 178 self.stackTable.append(Stack(prefix_id=prefix_id, 179 frame_id=frame_id, 180 category_id=0)) 181 self.stackMap[key] = stack_id 182 return stack_id 183 184 def _intern_string(self, string: str) -> int: 185 """Gets a matching string, or saves the new string. Returns a String ID.""" 186 string_id = self.stringMap.get(string) 187 if string_id is not None: 188 return string_id 189 string_id = len(self.stringTable) 190 self.stringTable.append(string) 191 self.stringMap[string] = string_id 192 return string_id 193 194 def _intern_frame(self, frame_str: str) -> int: 195 """Gets a matching stack frame, or saves the new frame. Returns a Frame ID.""" 196 frame_id = self.frameMap.get(frame_str) 197 if frame_id is not None: 198 return frame_id 199 frame_id = len(self.frameTable) 200 self.frameMap[frame_str] = frame_id 201 string_id = self._intern_string(frame_str) 202 203 category = 0 204 # Heuristic: kernel code contains "kallsyms" as the library name. 205 if "kallsyms" in frame_str or ".ko" in frame_str: 206 category = 1 207 if frame_str.startswith("__schedule "): 208 category = 5 209 elif ".so" in frame_str: 210 category = 2 211 elif ".vdex" in frame_str: 212 category = 3 213 elif ".oat" in frame_str: 214 category = 4 215 # Heuristic: empirically, off-CPU profiles mostly measure off-CPU time 216 # accounted to the linux kernel __schedule function, which handles 217 # blocking. This only works if we have kernel symbol (kallsyms) 218 # access though. 219 # https://cs.android.com/android/kernel/superproject/+/common-android-mainline:common/kernel/sched/core.c;l=6593;drc=0c99414a07ddaa18d8eb4be90b551d2687cbde2f 220 221 self.frameTable.append(Frame( 222 string_id=string_id, 223 relevantForJS=False, 224 innerWindowID=0, 225 implementation=None, 226 optimizations=None, 227 line=None, 228 column=None, 229 category=category, 230 subcategory=0, 231 )) 232 return frame_id 233 234 def add_sample(self, comm: str, stack: List[str], time_ms: Milliseconds) -> None: 235 """Add a timestamped stack trace sample to the thread builder. 236 237 Args: 238 comm: command-line (name) of the thread at this sample 239 stack: sampled stack frames. Root first, leaf last. 240 time_ms: timestamp of sample in milliseconds 241 """ 242 # Unix threads often don't set their name immediately upon creation. 243 # Use the last name 244 if self.comm != comm: 245 self.comm = comm 246 247 prefix_stack_id = None 248 for frame in stack: 249 frame_id = self._intern_frame(frame) 250 prefix_stack_id = self._intern_stack(frame_id, prefix_stack_id) 251 252 self.samples.append(Sample(stack_id=prefix_stack_id, 253 time_ms=time_ms, 254 responsiveness=0, 255 complete_stack=is_complete_stack(stack))) 256 257 def sort_samples(self) -> None: 258 """ The samples aren't guaranteed to be in order. Sort them by time. """ 259 self.samples.sort(key=lambda s: s.time_ms) 260 261 def remove_stack_gaps(self, max_remove_gap_length: int, gap_distr: Dict[int, int]) -> None: 262 """ Ideally all callstacks are complete. But some may be broken for different reasons. 263 To create a smooth view in "Stack Chart", remove small gaps of broken callstacks. 264 265 Args: 266 max_remove_gap_length: the max length of continuous broken-stack samples to remove 267 """ 268 if max_remove_gap_length == 0: 269 return 270 i = 0 271 remove_flags = [False] * len(self.samples) 272 while i < len(self.samples): 273 if self.samples[i].complete_stack: 274 i += 1 275 continue 276 n = 1 277 while (i + n < len(self.samples)) and (not self.samples[i + n].complete_stack): 278 n += 1 279 gap_distr[n] += 1 280 if n <= max_remove_gap_length: 281 for j in range(i, i + n): 282 remove_flags[j] = True 283 i += n 284 if True in remove_flags: 285 old_samples = self.samples 286 self.samples = [s for s, remove in zip(old_samples, remove_flags) if not remove] 287 288 def to_json_dict(self) -> Dict: 289 """Converts this Thread to GeckoThread JSON format.""" 290 291 # Gecko profile format is row-oriented data as List[List], 292 # And a schema for interpreting each index. 293 # Schema: 294 # https://github.com/firefox-devtools/profiler/blob/main/docs-developer/gecko-profile-format.md 295 # https://github.com/firefox-devtools/profiler/blob/53970305b51b9b472e26d7457fee1d66cd4e2737/src/types/gecko-profile.js#L230 296 return { 297 "tid": self.tid, 298 "pid": self.pid, 299 "name": self.comm, 300 # https://github.com/firefox-devtools/profiler/blob/53970305b51b9b472e26d7457fee1d66cd4e2737/src/types/gecko-profile.js#L51 301 "markers": { 302 "schema": { 303 "name": 0, 304 "startTime": 1, 305 "endTime": 2, 306 "phase": 3, 307 "category": 4, 308 "data": 5, 309 }, 310 "data": [], 311 }, 312 # https://github.com/firefox-devtools/profiler/blob/53970305b51b9b472e26d7457fee1d66cd4e2737/src/types/gecko-profile.js#L90 313 "samples": { 314 "schema": { 315 "stack": 0, 316 "time": 1, 317 "responsiveness": 2, 318 }, 319 "data": [s.to_json() for s in self.samples], 320 }, 321 # https://github.com/firefox-devtools/profiler/blob/53970305b51b9b472e26d7457fee1d66cd4e2737/src/types/gecko-profile.js#L156 322 "frameTable": { 323 "schema": { 324 "location": 0, 325 "relevantForJS": 1, 326 "innerWindowID": 2, 327 "implementation": 3, 328 "optimizations": 4, 329 "line": 5, 330 "column": 6, 331 "category": 7, 332 "subcategory": 8, 333 }, 334 "data": self.frameTable, 335 }, 336 # https://github.com/firefox-devtools/profiler/blob/53970305b51b9b472e26d7457fee1d66cd4e2737/src/types/gecko-profile.js#L216 337 "stackTable": { 338 "schema": { 339 "prefix": 0, 340 "frame": 1, 341 "category": 2, 342 }, 343 "data": self.stackTable, 344 }, 345 "stringTable": self.stringTable, 346 "registerTime": 0, 347 "unregisterTime": None, 348 "processType": "default", 349 } 350 351 352def remove_stack_gaps(max_remove_gap_length: int, thread_map: Dict[int, Thread]) -> None: 353 """ Remove stack gaps for each thread, and print status. """ 354 if max_remove_gap_length == 0: 355 return 356 total_sample_count = 0 357 remove_sample_count = 0 358 gap_distr = Counter() 359 for tid in list(thread_map.keys()): 360 thread = thread_map[tid] 361 old_n = len(thread.samples) 362 thread.remove_stack_gaps(max_remove_gap_length, gap_distr) 363 new_n = len(thread.samples) 364 total_sample_count += old_n 365 remove_sample_count += old_n - new_n 366 if new_n == 0: 367 del thread_map[tid] 368 if total_sample_count != 0: 369 logging.info('Remove stack gaps with length <= %d. %d (%.2f%%) samples are removed.', 370 max_remove_gap_length, remove_sample_count, 371 remove_sample_count / total_sample_count * 100 372 ) 373 logging.debug('Stack gap length distribution among samples (gap_length: count): %s', 374 gap_distr) 375 376 377def _gecko_profile( 378 record_file: str, 379 symfs_dir: Optional[str], 380 kallsyms_file: Optional[str], 381 report_lib_options: ReportLibOptions, 382 max_remove_gap_length: int) -> GeckoProfile: 383 """convert a simpleperf profile to gecko format""" 384 lib = ReportLib() 385 386 lib.ShowIpForUnknownSymbol() 387 if symfs_dir is not None: 388 lib.SetSymfs(symfs_dir) 389 lib.SetRecordFile(record_file) 390 if kallsyms_file is not None: 391 lib.SetKallsymsFile(kallsyms_file) 392 lib.SetReportOptions(report_lib_options) 393 394 arch = lib.GetArch() 395 meta_info = lib.MetaInfo() 396 record_cmd = lib.GetRecordCmd() 397 398 # Map from tid to Thread 399 threadMap: Dict[int, Thread] = {} 400 401 while True: 402 sample = lib.GetNextSample() 403 if sample is None: 404 lib.Close() 405 break 406 symbol = lib.GetSymbolOfCurrentSample() 407 callchain = lib.GetCallChainOfCurrentSample() 408 sample_time_ms = sample.time / 1000000 409 410 stack = ['%s (in %s)' % (symbol.symbol_name, symbol.dso_name)] 411 for i in range(callchain.nr): 412 entry = callchain.entries[i] 413 stack.append('%s (in %s)' % (entry.symbol.symbol_name, entry.symbol.dso_name)) 414 # We want root first, leaf last. 415 stack.reverse() 416 417 # add thread sample 418 thread = threadMap.get(sample.tid) 419 if thread is None: 420 thread = Thread(comm=sample.thread_comm, pid=sample.pid, tid=sample.tid) 421 threadMap[sample.tid] = thread 422 thread.add_sample( 423 comm=sample.thread_comm, 424 stack=stack, 425 # We are being a bit fast and loose here with time here. simpleperf 426 # uses CLOCK_MONOTONIC by default, which doesn't use the normal unix 427 # epoch, but rather some arbitrary time. In practice, this doesn't 428 # matter, the Firefox Profiler normalises all the timestamps to begin at 429 # the minimum time. Consider fixing this in future, if needed, by 430 # setting `simpleperf record --clockid realtime`. 431 time_ms=sample_time_ms) 432 433 for thread in threadMap.values(): 434 thread.sort_samples() 435 436 remove_stack_gaps(max_remove_gap_length, threadMap) 437 438 threads = [thread.to_json_dict() for thread in threadMap.values()] 439 440 profile_timestamp = meta_info.get('timestamp') 441 end_time_ms = (int(profile_timestamp) * 1000) if profile_timestamp else 0 442 443 # Schema: https://github.com/firefox-devtools/profiler/blob/53970305b51b9b472e26d7457fee1d66cd4e2737/src/types/gecko-profile.js#L305 444 gecko_profile_meta = { 445 "interval": 1, 446 "processType": 0, 447 "product": record_cmd, 448 "device": meta_info.get("product_props"), 449 "platform": meta_info.get("android_build_fingerprint"), 450 "stackwalk": 1, 451 "debug": 0, 452 "gcpoison": 0, 453 "asyncstack": 1, 454 # The profile timestamp is actually the end time, not the start time. 455 # This is close enough for our purposes; I mostly just want to know which 456 # day the profile was taken! Consider fixing this in future, if needed, 457 # by setting `simpleperf record --clockid realtime` and taking the minimum 458 # sample time. 459 "startTime": end_time_ms, 460 "shutdownTime": None, 461 "version": 24, 462 "presymbolicated": True, 463 "categories": CATEGORIES, 464 "markerSchema": [], 465 "abi": arch, 466 "oscpu": meta_info.get("android_build_fingerprint"), 467 } 468 469 # Schema: 470 # https://github.com/firefox-devtools/profiler/blob/53970305b51b9b472e26d7457fee1d66cd4e2737/src/types/gecko-profile.js#L377 471 # https://github.com/firefox-devtools/profiler/blob/main/docs-developer/gecko-profile-format.md 472 return { 473 "meta": gecko_profile_meta, 474 "libs": [], 475 "threads": threads, 476 "processes": [], 477 "pausedRanges": [], 478 } 479 480 481def main() -> None: 482 parser = BaseArgumentParser(description=__doc__) 483 parser.add_argument('--symfs', 484 help='Set the path to find binaries with symbols and debug info.') 485 parser.add_argument('--kallsyms', help='Set the path to find kernel symbols.') 486 parser.add_argument('-i', '--record_file', nargs='?', default='perf.data', 487 help='Default is perf.data.') 488 parser.add_argument('--remove-gaps', metavar='MAX_GAP_LENGTH', dest='max_remove_gap_length', 489 type=int, default=3, help=""" 490 Ideally all callstacks are complete. But some may be broken for different 491 reasons. To create a smooth view in "Stack Chart", remove small gaps of 492 broken callstacks. MAX_GAP_LENGTH is the max length of continuous 493 broken-stack samples we want to remove. 494 """ 495 ) 496 parser.add_report_lib_options() 497 args = parser.parse_args() 498 profile = _gecko_profile( 499 record_file=args.record_file, 500 symfs_dir=args.symfs, 501 kallsyms_file=args.kallsyms, 502 report_lib_options=args.report_lib_options, 503 max_remove_gap_length=args.max_remove_gap_length) 504 505 json.dump(profile, sys.stdout, sort_keys=True) 506 507 508if __name__ == '__main__': 509 main() 510