• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# Copyright (C) 2024 The Android Open Source Project
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7#      http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14
15import itertools
16from typing import List, Sized, Union
17
18from perfetto.common.exceptions import PerfettoException
19
20try:
21  import pandas as pd
22  HAS_PANDAS = True
23except ModuleNotFoundError:
24  HAS_PANDAS = False
25except ImportError:
26  HAS_PANDAS = False
27
28try:
29  import numpy as np
30  HAS_NUMPY = True
31except ModuleNotFoundError:
32  HAS_NUMPY = False
33except ImportError:
34  HAS_NUMPY = False
35
36# Values of these constants correspond to the QueryResponse message at
37# protos/perfetto/trace_processor/trace_processor.proto
38QUERY_CELL_INVALID_FIELD_ID = 0
39QUERY_CELL_NULL_FIELD_ID = 1
40QUERY_CELL_VARINT_FIELD_ID = 2
41QUERY_CELL_FLOAT64_FIELD_ID = 3
42QUERY_CELL_STRING_FIELD_ID = 4
43QUERY_CELL_BLOB_FIELD_ID = 5
44QUERY_CELL_TYPE_COUNT = 6
45
46
47def _extract_strings(x: Union[str, bytes]):
48  # It's possible on some occasions that there are non UTF-8 characters
49  # in the string_cells field. If this is the case, string_cells is
50  # a bytestring which needs to be decoded (but passing ignore so that
51  # we don't fail in decoding).
52  try:
53    input: str = x.decode('utf-8', 'ignore')
54  except AttributeError:
55    # AttributeError can occur when |x| is an str which happens when everything
56    # in it is UTF-8 (protobuf automatically does the conversion if it can).
57    input: str = x
58  res = input.split('\0')
59  if res:
60    res.pop()
61  return res
62
63
64# Provides a Python interface to operate on the contents of QueryResult protos
65class QueryResultIterator(Sized):
66  # This is the class returned to the user and contains one row of the
67  # resultant query. Each column name is stored as an attribute of this
68  # class, with the value corresponding to the column name and row in
69  # the query results table.
70  class Row(object):
71    # Required for pytype to correctly infer attributes from Row objects
72    _HAS_DYNAMIC_ATTRIBUTES = True
73
74    def __str__(self):
75      return str(self.__dict__)
76
77    def __repr__(self):
78      return self.__dict__
79
80  def __init__(self, column_names: List[str], batches: List):
81    self.column_names = list(column_names)
82    self.column_count = len(column_names)
83
84    if batches and not batches[-1].is_last_batch:
85      raise PerfettoException('Last batch did not have is_last_batch flag set')
86
87    self.cell_count = sum(len(b.cells) for b in batches)
88    for b in batches:
89      if self.column_count > 0 and len(b.cells) % self.column_count != 0:
90        raise PerfettoException(
91            f"Result has {self.cell_count} cells, not divisible by {self.column_count} columns"
92        )
93
94    self.row_count = self.cell_count // self.column_count if self.column_count > 0 else 0
95    self.cell_index = 0
96
97    if HAS_NUMPY:
98      self.cells = np.empty(self.cell_count, dtype='object')
99      cell_count = 0
100      for b in batches:
101        all_cells = np.array(b.cells)
102        all_cells_count = len(all_cells)
103        cut = self.cells[cell_count:cell_count + all_cells_count]
104        cut[all_cells == QUERY_CELL_NULL_FIELD_ID] = None
105        cut[all_cells == QUERY_CELL_VARINT_FIELD_ID] = b.varint_cells
106        cut[all_cells == QUERY_CELL_FLOAT64_FIELD_ID] = b.float64_cells
107        cut[all_cells == QUERY_CELL_STRING_FIELD_ID] = _extract_strings(
108            b.string_cells)
109        cut[all_cells == QUERY_CELL_BLOB_FIELD_ID] = b.blob_cells
110        cell_count += all_cells_count
111    else:
112      self.cells = [None] * self.cell_count
113      cells = [
114          [],
115          [],
116          list(itertools.chain.from_iterable(b.varint_cells for b in batches)),
117          list(itertools.chain.from_iterable(b.float64_cells for b in batches)),
118          list(
119              itertools.chain.from_iterable(
120                  _extract_strings(b.string_cells) for b in batches)),
121          list(itertools.chain.from_iterable(b.blob_cells for b in batches)),
122      ]
123      cell_offsets = [0] * (QUERY_CELL_BLOB_FIELD_ID + 1)
124      for i, ct in enumerate(
125          itertools.chain.from_iterable(b.cells for b in batches)):
126        self.cells[i] = cells[ct][
127            cell_offsets[ct]] if ct != QUERY_CELL_NULL_FIELD_ID else None
128        cell_offsets[ct] += 1
129
130  # To use the query result as a populated Pandas dataframe, this
131  # function must be called directly after calling query inside
132  # TraceProcessor / Bigtrace.
133  def as_pandas_dataframe(self):
134    if HAS_PANDAS and HAS_NUMPY:
135      assert isinstance(self.cells, np.ndarray)
136      return pd.DataFrame(
137          self.cells.reshape((self.row_count, self.column_count)),
138          columns=self.column_names)
139    else:
140      raise PerfettoException(
141          'pandas/numpy dependency missing. Please run `pip3 install pandas numpy`'
142      )
143
144  def __len__(self):
145    return self.row_count
146
147  def __iter__(self):
148    return self
149
150  def __next__(self):
151    if self.cell_index == self.cell_count:
152      raise StopIteration
153    result = QueryResultIterator.Row()
154    for i, column_name in enumerate(self.column_names):
155      setattr(result, column_name, self.cells[self.cell_index + i])
156    self.cell_index += self.column_count
157    return result
158