• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2023 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef SRC_TRACE_PROCESSOR_DB_COLUMN_DATA_LAYER_H_
18 #define SRC_TRACE_PROCESSOR_DB_COLUMN_DATA_LAYER_H_
19 
20 #include <cstdint>
21 #include <memory>
22 #include <optional>
23 #include <string>
24 #include <utility>
25 #include <vector>
26 
27 #include "perfetto/base/compiler.h"
28 #include "perfetto/base/logging.h"
29 #include "perfetto/trace_processor/basic_types.h"
30 #include "perfetto/trace_processor/ref_counted.h"
31 #include "src/trace_processor/db/column/types.h"
32 
33 namespace perfetto::protos::pbzero {
34 class SerializedColumn_Storage;
35 }
36 
37 namespace perfetto::trace_processor::column {
38 class DataLayerChain;
39 
40 // Data structure which either directly or indirectly (i.e. by transforming
41 // the contents of another DataLayer) provides the data of a column of a
42 // table.
43 class DataLayer : public RefCounted {
44  public:
45   // Arguments for MakeChain on how the inner chain should be interpreted.
46   struct ChainCreationArgs {
47     constexpr explicit ChainCreationArgs(
48         bool _does_layer_order_chain_contents = false)
does_layer_order_chain_contentsChainCreationArgs49         : does_layer_order_chain_contents(_does_layer_order_chain_contents) {}
50 
51     // Indicates whether the current data layer orders the inner chain.
52     // Currently used by ArrangementOverlay to decide whether the arrangement
53     // orders a given chain.
54     bool does_layer_order_chain_contents;
55   };
56   virtual ~DataLayer();
57 
58   // Creates a DataLayerChain for a terminal DataLayer. This means the
59   // DataLayer directly should return the data it contains inside.
60   std::unique_ptr<DataLayerChain> MakeChain();
61 
62   // Creates a DataLayerChain for a non-terminal DataLayer. This means
63   // the DataLayer should transform the contents of the inner chain.
64   std::unique_ptr<DataLayerChain> MakeChain(
65       std::unique_ptr<DataLayerChain>,
66       ChainCreationArgs = ChainCreationArgs());
67 
68  protected:
69   // TODO(b/325583551): remove this when possible.
70   enum class Impl {
71     kArrangement,
72     kDenseNull,
73     kDummy,
74     kId,
75     kNull,
76     kNumericDouble,
77     kNumericUint32,
78     kNumericInt32,
79     kNumericInt64,
80     kRange,
81     kSelector,
82     kSetId,
83     kString,
84   };
DataLayer(Impl impl)85   explicit DataLayer(Impl impl) : impl_(impl) {}
86 
87  private:
88   Impl impl_;
89 };
90 
91 // Corresponds to a series of DataLayer chained together. Provides
92 // functionality for querying the transformed data of the entire chain.
93 class DataLayerChain {
94  public:
95   // Indicates the direction of the sort on a single chain.
96   enum class SortDirection {
97     kAscending,
98     kDescending,
99   };
100   // Struct wrapping indices to elements of this chain. Passed to sorting
101   // functions.
102   struct SortToken {
103     // An index pointing to an element in this chain. Indicates the element
104     // at this index should be compared.
105     uint32_t index;
106 
107     // An opaque value which can be set to some value meaningful to the
108     // caller. Implementations *should not* read at this value.
109     uint32_t payload;
110   };
111   using StorageProto = protos::pbzero::SerializedColumn_Storage;
112 
113   // Index vector related data required to Filter using IndexSearch.
114   struct Indices {
115     enum class State {
116       // We can't guarantee that data is in monotonic order.
117       kNonmonotonic,
118       // Data is in monotonic order.
119       kMonotonic,
120     };
CreateIndices121     static Indices Create(const std::vector<uint32_t>& raw, State state) {
122       std::vector<Token> tokens;
123       tokens.reserve(raw.size());
124       for (auto r : raw) {
125         tokens.push_back({r, r});
126       }
127       return Indices{std::move(tokens), state};
128     }
CreateWithIndexPayloadForTestingIndices129     static Indices CreateWithIndexPayloadForTesting(
130         const std::vector<uint32_t>& raw,
131         State state) {
132       std::vector<Token> tokens;
133       tokens.reserve(raw.size());
134       for (uint32_t i = 0; i < raw.size(); ++i) {
135         tokens.push_back(Token{raw[i], i});
136       }
137       return Indices{std::move(tokens), state};
138     }
139     std::vector<Token> tokens;
140     State state = State::kNonmonotonic;
141   };
142 
143   // Index vector related data required to Filter using IndexSearch.
144   struct OrderedIndices {
145     const uint32_t* data = nullptr;
146     uint32_t size = 0;
147     Indices::State state = Indices::State::kNonmonotonic;
148   };
149 
150   virtual ~DataLayerChain();
151 
152   // Start of public API.
153 
154   // Checks whether element at the provided index match |op| and |value|.
155   //
156   // Returns true if the element matches, false otherwise.
157   virtual SingleSearchResult SingleSearch(FilterOp op,
158                                           SqlValue value,
159                                           uint32_t row) const = 0;
160 
161   // Searches for elements which match |op| and |value| between |range.start|
162   // and |range.end|.
163   //
164   // Returns either a range or BitVector which indicate the positions in
165   // |range| which match the constraint. If a BitVector is returned, it will
166   // be *precisely* as large as |range.end|.
167   //
168   // Notes for callers:
169   //  * Callers should note that the return value of this function corresponds
170   //    to positions in the storage.
171   //
172   // Notes for implementors:
173   //  * Implementations should ensure that the return value is empty or *only*
174   //    includes positions in |range|. Callers are free to assume this and can
175   //    optimize based on it.
176   //  * Implementations should ensure that, if they return a BitVector, it is
177   //    precisely of size |range.end|.
Search(FilterOp op,SqlValue value,Range range)178   PERFETTO_ALWAYS_INLINE RangeOrBitVector Search(FilterOp op,
179                                                  SqlValue value,
180                                                  Range range) const {
181     PERFETTO_DCHECK(range.end <= size());
182     switch (ValidateSearchConstraints(op, value)) {
183       case SearchValidationResult::kAllData:
184         return RangeOrBitVector(range);
185       case SearchValidationResult::kNoData:
186         return RangeOrBitVector(Range());
187       case SearchValidationResult::kOk:
188         return SearchValidated(op, value, range);
189     }
190     PERFETTO_FATAL("For GCC");
191   }
192 
193   // Searches for elements which match |op| and |value| at the positions given
194   // by |indices| array.
195   //
196   // Returns either a range of BitVector which indicate the positions in
197   // |indices| which match the constraint. If a BitVector is returned, it will
198   // be *precisely* as large as |indices_count|.
199   //
200   // Notes for callers:
201   //  * Callers should note that the return value of this function corresponds
202   //    to positions in |indices| *not* positions in the storage.
203   //
204   // Notes for implementors:
205   //  * Implementations should ensure that, if they return a BitVector, it is
206   //    precisely of size |indices_count|.
IndexSearch(FilterOp op,SqlValue value,Indices & indices)207   PERFETTO_ALWAYS_INLINE void IndexSearch(FilterOp op,
208                                           SqlValue value,
209                                           Indices& indices) const {
210     switch (ValidateSearchConstraints(op, value)) {
211       case SearchValidationResult::kAllData:
212         return;
213       case SearchValidationResult::kNoData:
214         indices.tokens.clear();
215         return;
216       case SearchValidationResult::kOk:
217         IndexSearchValidated(op, value, indices);
218         return;
219     }
220     PERFETTO_FATAL("For GCC");
221   }
222 
223   // Searches for elements which match |op| and |value| at the positions given
224   // by OrderedIndicesdata.
225   //
226   // Returns a Range into OrderedIndicesdata of OrderedIndicesthat pass the
227   // constraint.
228   //
229   // Notes for callers:
230   //  * Should not be called on:
231   //    - kGlob and kRegex as those operations can't use the sorted state
232   //      hence they can't return a Range.
233   //    - kNe as this is inherently unsorted. Use kEq and then reverse the
234   //      result.
235   //  * Callers should note that the return value of this function corresponds
236   //    to positions in |indices| *not* positions in the storage.
237   PERFETTO_ALWAYS_INLINE Range
OrderedIndexSearch(FilterOp op,SqlValue value,const OrderedIndices & indices)238   OrderedIndexSearch(FilterOp op,
239                      SqlValue value,
240                      const OrderedIndices& indices) const {
241     switch (ValidateSearchConstraints(op, value)) {
242       case SearchValidationResult::kAllData:
243         return {0, indices.size};
244       case SearchValidationResult::kNoData:
245         return {};
246       case SearchValidationResult::kOk:
247         return OrderedIndexSearchValidated(op, value, indices);
248     }
249     PERFETTO_FATAL("For GCC");
250   }
251 
252   // Stable sorts an array of SortToken elements between |start| and |end|
253   // using a comparator defined by looking up the elements in this chain using
254   // the index given by SortToken::index. |direction| indicates the direction of
255   // the sort (ascending or descending).
256   //
257   // In simple terms the expectation is for implementations do something like:
258   // ```
259   // std::stable_sort(start, index, [](const SortToken& a, const SortToken& b) {
260   //  return Get(a.index) < Get(b.index);
261   // });
262   // ```
263   // with |Get| being a function to lookup the element in this chain.
264   virtual void StableSort(SortToken* start,
265                           SortToken* end,
266                           SortDirection direction) const = 0;
267 
268   // Removes all indices pointing to values that are duplicates, as a result the
269   // indices will only point to distinct (not duplicated) values.
270   //
271   // Notes for implementors:
272   // * Each layer that might introduce duplicates is responsible for removing
273   // them.
274   virtual void Distinct(Indices&) const = 0;
275 
276   // After calling this function Indices will have at most one element. If
277   // present it will point to the first index with the largest value in the
278   // chain.
279   virtual std::optional<Token> MaxElement(Indices&) const = 0;
280 
281   // After calling this function Indices will have at most one element. If
282   // present it will point to the first index with the smallest value in the
283   // chain.
284   virtual std::optional<Token> MinElement(Indices&) const = 0;
285 
286   // Serializes storage data to proto format.
287   virtual void Serialize(StorageProto*) const = 0;
288 
289   // Returns a string which represents the column for debugging purposes.
290   //
291   // Warning: the format of the string returned by this class is *not* stable
292   // and should be relied upon for anything except printing for debugging
293   // purposes.
294   virtual std::string DebugString() const = 0;
295 
296   // Number of elements in stored data.
297   virtual uint32_t size() const = 0;
298 
299   // End of public API.
300   // The below methods might be public but are only intended for implementations
301   // of DataLayerChain.
302 
303   // Verifies whether any further filtering is needed and if not, whether the
304   // search would return all values or none of them. This allows for skipping
305   // the |Search| and |IndexSearch| in special cases.
306   //
307   // Notes for callers:
308   // * The SqlValue and FilterOp have to be valid in Sqlite: it will crash if
309   //   either: value is NULL and operation is different than "IS NULL" and "IS
310   //   NOT NULL" or the operation is "IS NULL" or "IS NOT NULL" and value is
311   //   different than NULL.
312   virtual SearchValidationResult ValidateSearchConstraints(FilterOp,
313                                                            SqlValue) const = 0;
314 
315   // Post-validated implementation of |Search|. See |Search|'s documentation.
316   virtual RangeOrBitVector SearchValidated(FilterOp, SqlValue, Range) const = 0;
317 
318   // Post-validated implementation of |IndexSearch|. See |IndexSearch|'s
319   // documentation.
320   virtual void IndexSearchValidated(FilterOp, SqlValue, Indices&) const = 0;
321 
322   // Post-validated implementation of |OrderedIndexSearch|. See
323   // |OrderedIndexSearch|'s documentation.
324   Range OrderedIndexSearchValidated(FilterOp op,
325                                     SqlValue value,
326                                     const OrderedIndices& indices) const;
327 
328   // Returns the SqlValue representing the value at a given index.
329   //
330   // This function might be very tempting to use as it appears cheap on the
331   // surface but because of how DataLayerChains might be layered on top of each
332   // other, this might require *several* virtual function calls per index.
333   // If you're tempted to use this, please consider instead create a new
334   // "vectorized" function instead and only using this as a last resort.
335   //
336   // The correct "class" of algorithms to use this function are cases where you
337   // have a set of indices you want to lookup and based on the value returned
338   // you will only use a fraction of them. In this case, it might be worth
339   // paying the non-vectorized lookup to vastly reduce how many indices need
340   // to be translated.
341   //
342   // An example of such an algorithm is binary search on indices.
343   virtual SqlValue Get_AvoidUsingBecauseSlow(uint32_t index) const = 0;
344 };
345 
346 }  // namespace perfetto::trace_processor::column
347 
348 #endif  // SRC_TRACE_PROCESSOR_DB_COLUMN_DATA_LAYER_H_
349