1# Copyright (C) 2024 The Android Open Source Project 2# 3# Licensed under the Apache License, Version 2.0 (the "License"); 4# you may not use this file except in compliance with the License. 5# You may obtain a copy of the License at 6# 7# http://www.apache.org/licenses/LICENSE-2.0 8# 9# Unless required by applicable law or agreed to in writing, software 10# distributed under the License is distributed on an "AS IS" BASIS, 11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12# See the License for the specific language governing permissions and 13# limitations under the License. 14 15import itertools 16from typing import List, Sized, Union 17 18from perfetto.common.exceptions import PerfettoException 19 20try: 21 import pandas as pd 22 HAS_PANDAS = True 23except ModuleNotFoundError: 24 HAS_PANDAS = False 25except ImportError: 26 HAS_PANDAS = False 27 28try: 29 import numpy as np 30 HAS_NUMPY = True 31except ModuleNotFoundError: 32 HAS_NUMPY = False 33except ImportError: 34 HAS_NUMPY = False 35 36# Values of these constants correspond to the QueryResponse message at 37# protos/perfetto/trace_processor/trace_processor.proto 38QUERY_CELL_INVALID_FIELD_ID = 0 39QUERY_CELL_NULL_FIELD_ID = 1 40QUERY_CELL_VARINT_FIELD_ID = 2 41QUERY_CELL_FLOAT64_FIELD_ID = 3 42QUERY_CELL_STRING_FIELD_ID = 4 43QUERY_CELL_BLOB_FIELD_ID = 5 44QUERY_CELL_TYPE_COUNT = 6 45 46 47def _extract_strings(x: Union[str, bytes]): 48 # It's possible on some occasions that there are non UTF-8 characters 49 # in the string_cells field. If this is the case, string_cells is 50 # a bytestring which needs to be decoded (but passing ignore so that 51 # we don't fail in decoding). 52 try: 53 input: str = x.decode('utf-8', 'ignore') 54 except AttributeError: 55 # AttributeError can occur when |x| is an str which happens when everything 56 # in it is UTF-8 (protobuf automatically does the conversion if it can). 57 input: str = x 58 res = input.split('\0') 59 if res: 60 res.pop() 61 return res 62 63 64# Provides a Python interface to operate on the contents of QueryResult protos 65class QueryResultIterator(Sized): 66 # This is the class returned to the user and contains one row of the 67 # resultant query. Each column name is stored as an attribute of this 68 # class, with the value corresponding to the column name and row in 69 # the query results table. 70 class Row(object): 71 # Required for pytype to correctly infer attributes from Row objects 72 _HAS_DYNAMIC_ATTRIBUTES = True 73 74 def __str__(self): 75 return str(self.__dict__) 76 77 def __repr__(self): 78 return self.__dict__ 79 80 def __init__(self, column_names: List[str], batches: List): 81 self.column_names = list(column_names) 82 self.column_count = len(column_names) 83 84 if batches and not batches[-1].is_last_batch: 85 raise PerfettoException('Last batch did not have is_last_batch flag set') 86 87 self.cell_count = sum(len(b.cells) for b in batches) 88 for b in batches: 89 if self.column_count > 0 and len(b.cells) % self.column_count != 0: 90 raise PerfettoException( 91 f"Result has {self.cell_count} cells, not divisible by {self.column_count} columns" 92 ) 93 94 self.row_count = self.cell_count // self.column_count if self.column_count > 0 else 0 95 self.cell_index = 0 96 97 if HAS_NUMPY: 98 self.cells = np.empty(self.cell_count, dtype='object') 99 cell_count = 0 100 for b in batches: 101 all_cells = np.array(b.cells) 102 all_cells_count = len(all_cells) 103 cut = self.cells[cell_count:cell_count + all_cells_count] 104 cut[all_cells == QUERY_CELL_NULL_FIELD_ID] = None 105 cut[all_cells == QUERY_CELL_VARINT_FIELD_ID] = b.varint_cells 106 cut[all_cells == QUERY_CELL_FLOAT64_FIELD_ID] = b.float64_cells 107 cut[all_cells == QUERY_CELL_STRING_FIELD_ID] = _extract_strings( 108 b.string_cells) 109 cut[all_cells == QUERY_CELL_BLOB_FIELD_ID] = b.blob_cells 110 cell_count += all_cells_count 111 else: 112 self.cells = [None] * self.cell_count 113 cells = [ 114 [], 115 [], 116 list(itertools.chain.from_iterable(b.varint_cells for b in batches)), 117 list(itertools.chain.from_iterable(b.float64_cells for b in batches)), 118 list( 119 itertools.chain.from_iterable( 120 _extract_strings(b.string_cells) for b in batches)), 121 list(itertools.chain.from_iterable(b.blob_cells for b in batches)), 122 ] 123 cell_offsets = [0] * (QUERY_CELL_BLOB_FIELD_ID + 1) 124 for i, ct in enumerate( 125 itertools.chain.from_iterable(b.cells for b in batches)): 126 self.cells[i] = cells[ct][ 127 cell_offsets[ct]] if ct != QUERY_CELL_NULL_FIELD_ID else None 128 cell_offsets[ct] += 1 129 130 # To use the query result as a populated Pandas dataframe, this 131 # function must be called directly after calling query inside 132 # TraceProcessor / Bigtrace. 133 def as_pandas_dataframe(self): 134 if HAS_PANDAS and HAS_NUMPY: 135 assert isinstance(self.cells, np.ndarray) 136 return pd.DataFrame( 137 self.cells.reshape((self.row_count, self.column_count)), 138 columns=self.column_names) 139 else: 140 raise PerfettoException( 141 'pandas/numpy dependency missing. Please run `pip3 install pandas numpy`' 142 ) 143 144 def __len__(self): 145 return self.row_count 146 147 def __iter__(self): 148 return self 149 150 def __next__(self): 151 if self.cell_index == self.cell_count: 152 raise StopIteration 153 result = QueryResultIterator.Row() 154 for i, column_name in enumerate(self.column_names): 155 setattr(result, column_name, self.cells[self.cell_index + i]) 156 self.cell_index += self.column_count 157 return result 158