• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#!/usr/bin/env python3
2#
3# Copyright (C) 2021 The Android Open Source Project
4#
5# Licensed under the Apache License, Version 2.0 (the "License");
6# you may not use this file except in compliance with the License.
7# You may obtain a copy of the License at
8#
9#      http://www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing, software
12# distributed under the License is distributed on an "AS IS" BASIS,
13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14# See the License for the specific language governing permissions and
15# limitations under the License.
16#
17
18"""gecko_profile_generator.py: converts perf.data to Gecko Profile Format,
19    which can be read by https://profiler.firefox.com/.
20
21  Example:
22    ./app_profiler.py
23    ./gecko_profile_generator.py | gzip > gecko-profile.json.gz
24
25  Then open gecko-profile.json.gz in https://profiler.firefox.com/
26"""
27
28from collections import Counter
29from dataclasses import dataclass, field
30import json
31import logging
32import sys
33from typing import List, Dict, Optional, NamedTuple, Tuple
34
35from simpleperf_report_lib import ReportLib
36from simpleperf_utils import BaseArgumentParser, ReportLibOptions
37
38
39StringID = int
40StackID = int
41FrameID = int
42CategoryID = int
43Milliseconds = float
44GeckoProfile = Dict
45
46
47# https://github.com/firefox-devtools/profiler/blob/53970305b51b9b472e26d7457fee1d66cd4e2737/src/types/gecko-profile.js#L156
48class Frame(NamedTuple):
49    string_id: StringID
50    relevantForJS: bool
51    innerWindowID: int
52    implementation: None
53    optimizations: None
54    line: None
55    column: None
56    category: CategoryID
57    subcategory: int
58
59
60# https://github.com/firefox-devtools/profiler/blob/53970305b51b9b472e26d7457fee1d66cd4e2737/src/types/gecko-profile.js#L216
61class Stack(NamedTuple):
62    prefix_id: Optional[StackID]
63    frame_id: FrameID
64    category_id: CategoryID
65
66
67# https://github.com/firefox-devtools/profiler/blob/53970305b51b9b472e26d7457fee1d66cd4e2737/src/types/gecko-profile.js#L90
68class Sample(NamedTuple):
69    stack_id: Optional[StackID]
70    time_ms: Milliseconds
71    responsiveness: int
72    complete_stack: bool
73
74    def to_json(self):
75        return [self.stack_id, self.time_ms, self.responsiveness]
76
77
78# Schema: https://github.com/firefox-devtools/profiler/blob/53970305b51b9b472e26d7457fee1d66cd4e2737/src/types/profile.js#L425
79# Colors must be defined in:
80# https://github.com/firefox-devtools/profiler/blob/50124adbfa488adba6e2674a8f2618cf34b59cd2/res/css/categories.css
81CATEGORIES = [
82    {
83        "name": 'User',
84        # Follow Brendan Gregg's Flamegraph convention: yellow for userland
85        # https://github.com/brendangregg/FlameGraph/blob/810687f180f3c4929b5d965f54817a5218c9d89b/flamegraph.pl#L419
86        "color": 'yellow',
87        "subcategories": ['Other']
88    },
89    {
90        "name": 'Kernel',
91        # Follow Brendan Gregg's Flamegraph convention: orange for kernel
92        # https://github.com/brendangregg/FlameGraph/blob/810687f180f3c4929b5d965f54817a5218c9d89b/flamegraph.pl#L417
93        "color": 'orange',
94        "subcategories": ['Other']
95    },
96    {
97        "name": 'Native',
98        # Follow Brendan Gregg's Flamegraph convention: yellow for userland
99        # https://github.com/brendangregg/FlameGraph/blob/810687f180f3c4929b5d965f54817a5218c9d89b/flamegraph.pl#L419
100        "color": 'yellow',
101        "subcategories": ['Other']
102    },
103    {
104        "name": 'DEX',
105        # Follow Brendan Gregg's Flamegraph convention: green for Java/JIT
106        # https://github.com/brendangregg/FlameGraph/blob/810687f180f3c4929b5d965f54817a5218c9d89b/flamegraph.pl#L411
107        "color": 'green',
108        "subcategories": ['Other']
109    },
110    {
111        "name": 'OAT',
112        # Follow Brendan Gregg's Flamegraph convention: green for Java/JIT
113        # https://github.com/brendangregg/FlameGraph/blob/810687f180f3c4929b5d965f54817a5218c9d89b/flamegraph.pl#L411
114        "color": 'green',
115        "subcategories": ['Other']
116    },
117    {
118        "name": 'Off-CPU',
119        # Follow Brendan Gregg's Flamegraph convention: blue for off-CPU
120        # https://github.com/brendangregg/FlameGraph/blob/810687f180f3c4929b5d965f54817a5218c9d89b/flamegraph.pl#L470
121        "color": 'blue',
122        "subcategories": ['Other']
123    },
124    # Not used by this exporter yet, but some Firefox Profiler code assumes
125    # there is an 'Other' category by searching for a category with
126    # color=grey, so include this.
127    {
128        "name": 'Other',
129        "color": 'grey',
130        "subcategories": ['Other']
131    },
132]
133
134
135def is_complete_stack(stack: List[str]) -> bool:
136    """ Check if the callstack is complete. The stack starts from root. """
137    for entry in stack:
138        if ('__libc_init' in entry) or ('__start_thread' in entry):
139            return True
140    return False
141
142
143@dataclass
144class Thread:
145    """A builder for a profile of a single thread.
146
147    Attributes:
148      comm: Thread command-line (name).
149      pid: process ID of containing process.
150      tid: thread ID.
151      samples: Timeline of profile samples.
152      frameTable: interned stack frame ID -> stack frame.
153      stringTable: interned string ID -> string.
154      stringMap: interned string -> string ID.
155      stackTable: interned stack ID -> stack.
156      stackMap: (stack prefix ID, leaf stack frame ID) -> interned Stack ID.
157      frameMap: Stack Frame string -> interned Frame ID.
158    """
159    comm: str
160    pid: int
161    tid: int
162    samples: List[Sample] = field(default_factory=list)
163    frameTable: List[Frame] = field(default_factory=list)
164    stringTable: List[str] = field(default_factory=list)
165    # TODO: this is redundant with frameTable, could we remove this?
166    stringMap: Dict[str, int] = field(default_factory=dict)
167    stackTable: List[Stack] = field(default_factory=list)
168    stackMap: Dict[Tuple[Optional[int], int], int] = field(default_factory=dict)
169    frameMap: Dict[str, int] = field(default_factory=dict)
170
171    def _intern_stack(self, frame_id: int, prefix_id: Optional[int]) -> int:
172        """Gets a matching stack, or saves the new stack. Returns a Stack ID."""
173        key = (prefix_id, frame_id)
174        stack_id = self.stackMap.get(key)
175        if stack_id is not None:
176            return stack_id
177        stack_id = len(self.stackTable)
178        self.stackTable.append(Stack(prefix_id=prefix_id,
179                                     frame_id=frame_id,
180                                     category_id=0))
181        self.stackMap[key] = stack_id
182        return stack_id
183
184    def _intern_string(self, string: str) -> int:
185        """Gets a matching string, or saves the new string. Returns a String ID."""
186        string_id = self.stringMap.get(string)
187        if string_id is not None:
188            return string_id
189        string_id = len(self.stringTable)
190        self.stringTable.append(string)
191        self.stringMap[string] = string_id
192        return string_id
193
194    def _intern_frame(self, frame_str: str) -> int:
195        """Gets a matching stack frame, or saves the new frame. Returns a Frame ID."""
196        frame_id = self.frameMap.get(frame_str)
197        if frame_id is not None:
198            return frame_id
199        frame_id = len(self.frameTable)
200        self.frameMap[frame_str] = frame_id
201        string_id = self._intern_string(frame_str)
202
203        category = 0
204        # Heuristic: kernel code contains "kallsyms" as the library name.
205        if "kallsyms" in frame_str or ".ko" in frame_str:
206            category = 1
207            if frame_str.startswith("__schedule "):
208                category = 5
209        elif ".so" in frame_str:
210            category = 2
211        elif ".vdex" in frame_str:
212            category = 3
213        elif ".oat" in frame_str:
214            category = 4
215        # Heuristic: empirically, off-CPU profiles mostly measure off-CPU time
216        # accounted to the linux kernel __schedule function, which handles
217        # blocking. This only works if we have kernel symbol (kallsyms)
218        # access though.
219        # https://cs.android.com/android/kernel/superproject/+/common-android-mainline:common/kernel/sched/core.c;l=6593;drc=0c99414a07ddaa18d8eb4be90b551d2687cbde2f
220
221        self.frameTable.append(Frame(
222            string_id=string_id,
223            relevantForJS=False,
224            innerWindowID=0,
225            implementation=None,
226            optimizations=None,
227            line=None,
228            column=None,
229            category=category,
230            subcategory=0,
231        ))
232        return frame_id
233
234    def add_sample(self, comm: str, stack: List[str], time_ms: Milliseconds) -> None:
235        """Add a timestamped stack trace sample to the thread builder.
236
237        Args:
238          comm: command-line (name) of the thread at this sample
239          stack: sampled stack frames. Root first, leaf last.
240          time_ms: timestamp of sample in milliseconds
241        """
242        # Unix threads often don't set their name immediately upon creation.
243        # Use the last name
244        if self.comm != comm:
245            self.comm = comm
246
247        prefix_stack_id = None
248        for frame in stack:
249            frame_id = self._intern_frame(frame)
250            prefix_stack_id = self._intern_stack(frame_id, prefix_stack_id)
251
252        self.samples.append(Sample(stack_id=prefix_stack_id,
253                                   time_ms=time_ms,
254                                   responsiveness=0,
255                                   complete_stack=is_complete_stack(stack)))
256
257    def sort_samples(self) -> None:
258        """ The samples aren't guaranteed to be in order. Sort them by time. """
259        self.samples.sort(key=lambda s: s.time_ms)
260
261    def remove_stack_gaps(self, max_remove_gap_length: int, gap_distr: Dict[int, int]) -> None:
262        """ Ideally all callstacks are complete. But some may be broken for different reasons.
263            To create a smooth view in "Stack Chart", remove small gaps of broken callstacks.
264
265        Args:
266            max_remove_gap_length: the max length of continuous broken-stack samples to remove
267        """
268        if max_remove_gap_length == 0:
269            return
270        i = 0
271        remove_flags = [False] * len(self.samples)
272        while i < len(self.samples):
273            if self.samples[i].complete_stack:
274                i += 1
275                continue
276            n = 1
277            while (i + n < len(self.samples)) and (not self.samples[i + n].complete_stack):
278                n += 1
279            gap_distr[n] += 1
280            if n <= max_remove_gap_length:
281                for j in range(i, i + n):
282                    remove_flags[j] = True
283            i += n
284        if True in remove_flags:
285            old_samples = self.samples
286            self.samples = [s for s, remove in zip(old_samples, remove_flags) if not remove]
287
288    def to_json_dict(self) -> Dict:
289        """Converts this Thread to GeckoThread JSON format."""
290
291        # Gecko profile format is row-oriented data as List[List],
292        # And a schema for interpreting each index.
293        # Schema:
294        # https://github.com/firefox-devtools/profiler/blob/main/docs-developer/gecko-profile-format.md
295        # https://github.com/firefox-devtools/profiler/blob/53970305b51b9b472e26d7457fee1d66cd4e2737/src/types/gecko-profile.js#L230
296        return {
297            "tid": self.tid,
298            "pid": self.pid,
299            "name": self.comm,
300            # https://github.com/firefox-devtools/profiler/blob/53970305b51b9b472e26d7457fee1d66cd4e2737/src/types/gecko-profile.js#L51
301            "markers": {
302                "schema": {
303                    "name": 0,
304                    "startTime": 1,
305                    "endTime": 2,
306                    "phase": 3,
307                    "category": 4,
308                    "data": 5,
309                },
310                "data": [],
311            },
312            # https://github.com/firefox-devtools/profiler/blob/53970305b51b9b472e26d7457fee1d66cd4e2737/src/types/gecko-profile.js#L90
313            "samples": {
314                "schema": {
315                    "stack": 0,
316                    "time": 1,
317                    "responsiveness": 2,
318                },
319                "data": [s.to_json() for s in self.samples],
320            },
321            # https://github.com/firefox-devtools/profiler/blob/53970305b51b9b472e26d7457fee1d66cd4e2737/src/types/gecko-profile.js#L156
322            "frameTable": {
323                "schema": {
324                    "location": 0,
325                    "relevantForJS": 1,
326                    "innerWindowID": 2,
327                    "implementation": 3,
328                    "optimizations": 4,
329                    "line": 5,
330                    "column": 6,
331                    "category": 7,
332                    "subcategory": 8,
333                },
334                "data": self.frameTable,
335            },
336            # https://github.com/firefox-devtools/profiler/blob/53970305b51b9b472e26d7457fee1d66cd4e2737/src/types/gecko-profile.js#L216
337            "stackTable": {
338                "schema": {
339                    "prefix": 0,
340                    "frame": 1,
341                    "category": 2,
342                },
343                "data": self.stackTable,
344            },
345            "stringTable": self.stringTable,
346            "registerTime": 0,
347            "unregisterTime": None,
348            "processType": "default",
349        }
350
351
352def remove_stack_gaps(max_remove_gap_length: int, thread_map: Dict[int, Thread]) -> None:
353    """ Remove stack gaps for each thread, and print status. """
354    if max_remove_gap_length == 0:
355        return
356    total_sample_count = 0
357    remove_sample_count = 0
358    gap_distr = Counter()
359    for tid in list(thread_map.keys()):
360        thread = thread_map[tid]
361        old_n = len(thread.samples)
362        thread.remove_stack_gaps(max_remove_gap_length, gap_distr)
363        new_n = len(thread.samples)
364        total_sample_count += old_n
365        remove_sample_count += old_n - new_n
366        if new_n == 0:
367            del thread_map[tid]
368    if total_sample_count != 0:
369        logging.info('Remove stack gaps with length <= %d. %d (%.2f%%) samples are removed.',
370                     max_remove_gap_length, remove_sample_count,
371                     remove_sample_count / total_sample_count * 100
372                     )
373        logging.debug('Stack gap length distribution among samples (gap_length: count): %s',
374                      gap_distr)
375
376
377def _gecko_profile(
378        record_file: str,
379        symfs_dir: Optional[str],
380        kallsyms_file: Optional[str],
381        report_lib_options: ReportLibOptions,
382        max_remove_gap_length: int) -> GeckoProfile:
383    """convert a simpleperf profile to gecko format"""
384    lib = ReportLib()
385
386    lib.ShowIpForUnknownSymbol()
387    if symfs_dir is not None:
388        lib.SetSymfs(symfs_dir)
389    lib.SetRecordFile(record_file)
390    if kallsyms_file is not None:
391        lib.SetKallsymsFile(kallsyms_file)
392    lib.SetReportOptions(report_lib_options)
393
394    arch = lib.GetArch()
395    meta_info = lib.MetaInfo()
396    record_cmd = lib.GetRecordCmd()
397
398    # Map from tid to Thread
399    threadMap: Dict[int, Thread] = {}
400
401    while True:
402        sample = lib.GetNextSample()
403        if sample is None:
404            lib.Close()
405            break
406        symbol = lib.GetSymbolOfCurrentSample()
407        callchain = lib.GetCallChainOfCurrentSample()
408        sample_time_ms = sample.time / 1000000
409
410        stack = ['%s (in %s)' % (symbol.symbol_name, symbol.dso_name)]
411        for i in range(callchain.nr):
412            entry = callchain.entries[i]
413            stack.append('%s (in %s)' % (entry.symbol.symbol_name, entry.symbol.dso_name))
414        # We want root first, leaf last.
415        stack.reverse()
416
417        # add thread sample
418        thread = threadMap.get(sample.tid)
419        if thread is None:
420            thread = Thread(comm=sample.thread_comm, pid=sample.pid, tid=sample.tid)
421            threadMap[sample.tid] = thread
422        thread.add_sample(
423            comm=sample.thread_comm,
424            stack=stack,
425            # We are being a bit fast and loose here with time here.  simpleperf
426            # uses CLOCK_MONOTONIC by default, which doesn't use the normal unix
427            # epoch, but rather some arbitrary time. In practice, this doesn't
428            # matter, the Firefox Profiler normalises all the timestamps to begin at
429            # the minimum time.  Consider fixing this in future, if needed, by
430            # setting `simpleperf record --clockid realtime`.
431            time_ms=sample_time_ms)
432
433    for thread in threadMap.values():
434        thread.sort_samples()
435
436    remove_stack_gaps(max_remove_gap_length, threadMap)
437
438    threads = [thread.to_json_dict() for thread in threadMap.values()]
439
440    profile_timestamp = meta_info.get('timestamp')
441    end_time_ms = (int(profile_timestamp) * 1000) if profile_timestamp else 0
442
443    # Schema: https://github.com/firefox-devtools/profiler/blob/53970305b51b9b472e26d7457fee1d66cd4e2737/src/types/gecko-profile.js#L305
444    gecko_profile_meta = {
445        "interval": 1,
446        "processType": 0,
447        "product": record_cmd,
448        "device": meta_info.get("product_props"),
449        "platform": meta_info.get("android_build_fingerprint"),
450        "stackwalk": 1,
451        "debug": 0,
452        "gcpoison": 0,
453        "asyncstack": 1,
454        # The profile timestamp is actually the end time, not the start time.
455        # This is close enough for our purposes; I mostly just want to know which
456        # day the profile was taken! Consider fixing this in future, if needed,
457        # by setting `simpleperf record --clockid realtime` and taking the minimum
458        # sample time.
459        "startTime": end_time_ms,
460        "shutdownTime": None,
461        "version": 24,
462        "presymbolicated": True,
463        "categories": CATEGORIES,
464        "markerSchema": [],
465        "abi": arch,
466        "oscpu": meta_info.get("android_build_fingerprint"),
467    }
468
469    # Schema:
470    # https://github.com/firefox-devtools/profiler/blob/53970305b51b9b472e26d7457fee1d66cd4e2737/src/types/gecko-profile.js#L377
471    # https://github.com/firefox-devtools/profiler/blob/main/docs-developer/gecko-profile-format.md
472    return {
473        "meta": gecko_profile_meta,
474        "libs": [],
475        "threads": threads,
476        "processes": [],
477        "pausedRanges": [],
478    }
479
480
481def main() -> None:
482    parser = BaseArgumentParser(description=__doc__)
483    parser.add_argument('--symfs',
484                        help='Set the path to find binaries with symbols and debug info.')
485    parser.add_argument('--kallsyms', help='Set the path to find kernel symbols.')
486    parser.add_argument('-i', '--record_file', nargs='?', default='perf.data',
487                        help='Default is perf.data.')
488    parser.add_argument('--remove-gaps', metavar='MAX_GAP_LENGTH', dest='max_remove_gap_length',
489                        type=int, default=3, help="""
490                        Ideally all callstacks are complete. But some may be broken for different
491                        reasons. To create a smooth view in "Stack Chart", remove small gaps of
492                        broken callstacks. MAX_GAP_LENGTH is the max length of continuous
493                        broken-stack samples we want to remove.
494                        """
495                        )
496    parser.add_report_lib_options()
497    args = parser.parse_args()
498    profile = _gecko_profile(
499        record_file=args.record_file,
500        symfs_dir=args.symfs,
501        kallsyms_file=args.kallsyms,
502        report_lib_options=args.report_lib_options,
503        max_remove_gap_length=args.max_remove_gap_length)
504
505    json.dump(profile, sys.stdout, sort_keys=True)
506
507
508if __name__ == '__main__':
509    main()
510