1 /* 2 * Copyright (C) 2023 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef SRC_TRACE_PROCESSOR_DB_COLUMN_DATA_LAYER_H_ 18 #define SRC_TRACE_PROCESSOR_DB_COLUMN_DATA_LAYER_H_ 19 20 #include <cstdint> 21 #include <memory> 22 #include <optional> 23 #include <string> 24 #include <utility> 25 #include <vector> 26 27 #include "perfetto/base/compiler.h" 28 #include "perfetto/base/logging.h" 29 #include "perfetto/trace_processor/basic_types.h" 30 #include "perfetto/trace_processor/ref_counted.h" 31 #include "src/trace_processor/db/column/types.h" 32 33 namespace perfetto::protos::pbzero { 34 class SerializedColumn_Storage; 35 } 36 37 namespace perfetto::trace_processor::column { 38 class DataLayerChain; 39 40 // Data structure which either directly or indirectly (i.e. by transforming 41 // the contents of another DataLayer) provides the data of a column of a 42 // table. 43 class DataLayer : public RefCounted { 44 public: 45 // Arguments for MakeChain on how the inner chain should be interpreted. 46 struct ChainCreationArgs { 47 constexpr explicit ChainCreationArgs( 48 bool _does_layer_order_chain_contents = false) does_layer_order_chain_contentsChainCreationArgs49 : does_layer_order_chain_contents(_does_layer_order_chain_contents) {} 50 51 // Indicates whether the current data layer orders the inner chain. 52 // Currently used by ArrangementOverlay to decide whether the arrangement 53 // orders a given chain. 54 bool does_layer_order_chain_contents; 55 }; 56 virtual ~DataLayer(); 57 58 // Creates a DataLayerChain for a terminal DataLayer. This means the 59 // DataLayer directly should return the data it contains inside. 60 std::unique_ptr<DataLayerChain> MakeChain(); 61 62 // Creates a DataLayerChain for a non-terminal DataLayer. This means 63 // the DataLayer should transform the contents of the inner chain. 64 std::unique_ptr<DataLayerChain> MakeChain( 65 std::unique_ptr<DataLayerChain>, 66 ChainCreationArgs = ChainCreationArgs()); 67 68 protected: 69 // TODO(b/325583551): remove this when possible. 70 enum class Impl { 71 kArrangement, 72 kDenseNull, 73 kDummy, 74 kId, 75 kNull, 76 kNumericDouble, 77 kNumericUint32, 78 kNumericInt32, 79 kNumericInt64, 80 kRange, 81 kSelector, 82 kSetId, 83 kString, 84 }; DataLayer(Impl impl)85 explicit DataLayer(Impl impl) : impl_(impl) {} 86 87 private: 88 Impl impl_; 89 }; 90 91 // Corresponds to a series of DataLayer chained together. Provides 92 // functionality for querying the transformed data of the entire chain. 93 class DataLayerChain { 94 public: 95 // Indicates the direction of the sort on a single chain. 96 enum class SortDirection { 97 kAscending, 98 kDescending, 99 }; 100 // Struct wrapping indices to elements of this chain. Passed to sorting 101 // functions. 102 struct SortToken { 103 // An index pointing to an element in this chain. Indicates the element 104 // at this index should be compared. 105 uint32_t index; 106 107 // An opaque value which can be set to some value meaningful to the 108 // caller. Implementations *should not* read at this value. 109 uint32_t payload; 110 }; 111 using StorageProto = protos::pbzero::SerializedColumn_Storage; 112 113 // Index vector related data required to Filter using IndexSearch. 114 struct Indices { 115 enum class State { 116 // We can't guarantee that data is in monotonic order. 117 kNonmonotonic, 118 // Data is in monotonic order. 119 kMonotonic, 120 }; CreateIndices121 static Indices Create(const std::vector<uint32_t>& raw, State state) { 122 std::vector<Token> tokens; 123 tokens.reserve(raw.size()); 124 for (auto r : raw) { 125 tokens.push_back({r, r}); 126 } 127 return Indices{std::move(tokens), state}; 128 } CreateWithIndexPayloadForTestingIndices129 static Indices CreateWithIndexPayloadForTesting( 130 const std::vector<uint32_t>& raw, 131 State state) { 132 std::vector<Token> tokens; 133 tokens.reserve(raw.size()); 134 for (uint32_t i = 0; i < raw.size(); ++i) { 135 tokens.push_back(Token{raw[i], i}); 136 } 137 return Indices{std::move(tokens), state}; 138 } 139 std::vector<Token> tokens; 140 State state = State::kNonmonotonic; 141 }; 142 143 // Index vector related data required to Filter using IndexSearch. 144 struct OrderedIndices { 145 const uint32_t* data = nullptr; 146 uint32_t size = 0; 147 Indices::State state = Indices::State::kNonmonotonic; 148 }; 149 150 virtual ~DataLayerChain(); 151 152 // Start of public API. 153 154 // Checks whether element at the provided index match |op| and |value|. 155 // 156 // Returns true if the element matches, false otherwise. 157 virtual SingleSearchResult SingleSearch(FilterOp op, 158 SqlValue value, 159 uint32_t row) const = 0; 160 161 // Searches for elements which match |op| and |value| between |range.start| 162 // and |range.end|. 163 // 164 // Returns either a range or BitVector which indicate the positions in 165 // |range| which match the constraint. If a BitVector is returned, it will 166 // be *precisely* as large as |range.end|. 167 // 168 // Notes for callers: 169 // * Callers should note that the return value of this function corresponds 170 // to positions in the storage. 171 // 172 // Notes for implementors: 173 // * Implementations should ensure that the return value is empty or *only* 174 // includes positions in |range|. Callers are free to assume this and can 175 // optimize based on it. 176 // * Implementations should ensure that, if they return a BitVector, it is 177 // precisely of size |range.end|. Search(FilterOp op,SqlValue value,Range range)178 PERFETTO_ALWAYS_INLINE RangeOrBitVector Search(FilterOp op, 179 SqlValue value, 180 Range range) const { 181 PERFETTO_DCHECK(range.end <= size()); 182 switch (ValidateSearchConstraints(op, value)) { 183 case SearchValidationResult::kAllData: 184 return RangeOrBitVector(range); 185 case SearchValidationResult::kNoData: 186 return RangeOrBitVector(Range()); 187 case SearchValidationResult::kOk: 188 return SearchValidated(op, value, range); 189 } 190 PERFETTO_FATAL("For GCC"); 191 } 192 193 // Searches for elements which match |op| and |value| at the positions given 194 // by |indices| array. 195 // 196 // Returns either a range of BitVector which indicate the positions in 197 // |indices| which match the constraint. If a BitVector is returned, it will 198 // be *precisely* as large as |indices_count|. 199 // 200 // Notes for callers: 201 // * Callers should note that the return value of this function corresponds 202 // to positions in |indices| *not* positions in the storage. 203 // 204 // Notes for implementors: 205 // * Implementations should ensure that, if they return a BitVector, it is 206 // precisely of size |indices_count|. IndexSearch(FilterOp op,SqlValue value,Indices & indices)207 PERFETTO_ALWAYS_INLINE void IndexSearch(FilterOp op, 208 SqlValue value, 209 Indices& indices) const { 210 switch (ValidateSearchConstraints(op, value)) { 211 case SearchValidationResult::kAllData: 212 return; 213 case SearchValidationResult::kNoData: 214 indices.tokens.clear(); 215 return; 216 case SearchValidationResult::kOk: 217 IndexSearchValidated(op, value, indices); 218 return; 219 } 220 PERFETTO_FATAL("For GCC"); 221 } 222 223 // Searches for elements which match |op| and |value| at the positions given 224 // by OrderedIndicesdata. 225 // 226 // Returns a Range into OrderedIndicesdata of OrderedIndicesthat pass the 227 // constraint. 228 // 229 // Notes for callers: 230 // * Should not be called on: 231 // - kGlob and kRegex as those operations can't use the sorted state 232 // hence they can't return a Range. 233 // - kNe as this is inherently unsorted. Use kEq and then reverse the 234 // result. 235 // * Callers should note that the return value of this function corresponds 236 // to positions in |indices| *not* positions in the storage. 237 PERFETTO_ALWAYS_INLINE Range OrderedIndexSearch(FilterOp op,SqlValue value,const OrderedIndices & indices)238 OrderedIndexSearch(FilterOp op, 239 SqlValue value, 240 const OrderedIndices& indices) const { 241 switch (ValidateSearchConstraints(op, value)) { 242 case SearchValidationResult::kAllData: 243 return {0, indices.size}; 244 case SearchValidationResult::kNoData: 245 return {}; 246 case SearchValidationResult::kOk: 247 return OrderedIndexSearchValidated(op, value, indices); 248 } 249 PERFETTO_FATAL("For GCC"); 250 } 251 252 // Stable sorts an array of SortToken elements between |start| and |end| 253 // using a comparator defined by looking up the elements in this chain using 254 // the index given by SortToken::index. |direction| indicates the direction of 255 // the sort (ascending or descending). 256 // 257 // In simple terms the expectation is for implementations do something like: 258 // ``` 259 // std::stable_sort(start, index, [](const SortToken& a, const SortToken& b) { 260 // return Get(a.index) < Get(b.index); 261 // }); 262 // ``` 263 // with |Get| being a function to lookup the element in this chain. 264 virtual void StableSort(SortToken* start, 265 SortToken* end, 266 SortDirection direction) const = 0; 267 268 // Removes all indices pointing to values that are duplicates, as a result the 269 // indices will only point to distinct (not duplicated) values. 270 // 271 // Notes for implementors: 272 // * Each layer that might introduce duplicates is responsible for removing 273 // them. 274 virtual void Distinct(Indices&) const = 0; 275 276 // After calling this function Indices will have at most one element. If 277 // present it will point to the first index with the largest value in the 278 // chain. 279 virtual std::optional<Token> MaxElement(Indices&) const = 0; 280 281 // After calling this function Indices will have at most one element. If 282 // present it will point to the first index with the smallest value in the 283 // chain. 284 virtual std::optional<Token> MinElement(Indices&) const = 0; 285 286 // Serializes storage data to proto format. 287 virtual void Serialize(StorageProto*) const = 0; 288 289 // Returns a string which represents the column for debugging purposes. 290 // 291 // Warning: the format of the string returned by this class is *not* stable 292 // and should be relied upon for anything except printing for debugging 293 // purposes. 294 virtual std::string DebugString() const = 0; 295 296 // Number of elements in stored data. 297 virtual uint32_t size() const = 0; 298 299 // End of public API. 300 // The below methods might be public but are only intended for implementations 301 // of DataLayerChain. 302 303 // Verifies whether any further filtering is needed and if not, whether the 304 // search would return all values or none of them. This allows for skipping 305 // the |Search| and |IndexSearch| in special cases. 306 // 307 // Notes for callers: 308 // * The SqlValue and FilterOp have to be valid in Sqlite: it will crash if 309 // either: value is NULL and operation is different than "IS NULL" and "IS 310 // NOT NULL" or the operation is "IS NULL" or "IS NOT NULL" and value is 311 // different than NULL. 312 virtual SearchValidationResult ValidateSearchConstraints(FilterOp, 313 SqlValue) const = 0; 314 315 // Post-validated implementation of |Search|. See |Search|'s documentation. 316 virtual RangeOrBitVector SearchValidated(FilterOp, SqlValue, Range) const = 0; 317 318 // Post-validated implementation of |IndexSearch|. See |IndexSearch|'s 319 // documentation. 320 virtual void IndexSearchValidated(FilterOp, SqlValue, Indices&) const = 0; 321 322 // Post-validated implementation of |OrderedIndexSearch|. See 323 // |OrderedIndexSearch|'s documentation. 324 Range OrderedIndexSearchValidated(FilterOp op, 325 SqlValue value, 326 const OrderedIndices& indices) const; 327 328 // Returns the SqlValue representing the value at a given index. 329 // 330 // This function might be very tempting to use as it appears cheap on the 331 // surface but because of how DataLayerChains might be layered on top of each 332 // other, this might require *several* virtual function calls per index. 333 // If you're tempted to use this, please consider instead create a new 334 // "vectorized" function instead and only using this as a last resort. 335 // 336 // The correct "class" of algorithms to use this function are cases where you 337 // have a set of indices you want to lookup and based on the value returned 338 // you will only use a fraction of them. In this case, it might be worth 339 // paying the non-vectorized lookup to vastly reduce how many indices need 340 // to be translated. 341 // 342 // An example of such an algorithm is binary search on indices. 343 virtual SqlValue Get_AvoidUsingBecauseSlow(uint32_t index) const = 0; 344 }; 345 346 } // namespace perfetto::trace_processor::column 347 348 #endif // SRC_TRACE_PROCESSOR_DB_COLUMN_DATA_LAYER_H_ 349