• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2018 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef SRC_TRACE_PROCESSOR_PERFETTO_SQL_INTRINSICS_OPERATORS_SPAN_JOIN_OPERATOR_H_
18 #define SRC_TRACE_PROCESSOR_PERFETTO_SQL_INTRINSICS_OPERATORS_SPAN_JOIN_OPERATOR_H_
19 
20 #include <cstddef>
21 #include <cstdint>
22 #include <limits>
23 #include <optional>
24 #include <string>
25 #include <utility>
26 #include <vector>
27 
28 #include "perfetto/base/logging.h"
29 #include "perfetto/base/status.h"
30 #include "perfetto/ext/base/flat_hash_map.h"
31 #include "perfetto/ext/base/string_splitter.h"
32 #include "perfetto/ext/base/string_utils.h"
33 #include "perfetto/trace_processor/basic_types.h"
34 #include "src/trace_processor/sqlite/bindings/sqlite_module.h"
35 #include "src/trace_processor/sqlite/module_lifecycle_manager.h"
36 #include "src/trace_processor/sqlite/sqlite_engine.h"
37 
38 namespace perfetto::trace_processor {
39 
40 class PerfettoSqlEngine;
41 struct SpanJoinOperatorModule;
42 
43 // Implements the SPAN JOIN operation between two tables on a particular column.
44 //
45 // Span:
46 // A span is a row with a timestamp and a duration. It is used to model
47 // operations which run for a particular *span* of time.
48 //
49 // We draw spans like so (time on the x-axis):
50 // start of span->[ time where opertion is running ]<- end of span
51 //
52 // Multiple spans can happen in parallel:
53 // [      ]
54 //    [        ]
55 //   [                    ]
56 //  [ ]
57 //
58 // The above for example, models scheduling activity on a 4-core computer for a
59 // short period of time.
60 //
61 // Span join:
62 // The span join operation can be thought of as the intersection of span tables.
63 // That is, the join table has a span for each pair of spans in the child tables
64 // where the spans overlap. Because many spans are possible in parallel, an
65 // extra metadata column (labelled the "join column") is used to distinguish
66 // between the spanned tables.
67 //
68 // For a given join key suppose these were the two span tables:
69 // Table 1:   [        ]              [      ]         [ ]
70 // Table 2:          [      ]            [  ]           [      ]
71 // Output :          [ ]                 [  ]           []
72 //
73 // All other columns apart from timestamp (ts), duration (dur) and the join key
74 // are passed through unchanged.
75 struct SpanJoinOperatorModule : public sqlite::Module<SpanJoinOperatorModule> {
76  public:
77   static constexpr uint32_t kSourceGeqOpCode =
78       SQLITE_INDEX_CONSTRAINT_FUNCTION + 1;
79 
80   struct State;
81 
82   // Enum indicating whether the queries on the two inner tables should
83   // emit shadows.
84   enum class EmitShadowType {
85     // Used when the table should emit all shadow slices (both present and
86     // missing partition shadows).
87     kAll,
88 
89     // Used when the table should only emit shadows for partitions which are
90     // present.
91     kPresentPartitionOnly,
92 
93     // Used when the table should emit no shadow slices.
94     kNone,
95   };
96 
97   // Parsed version of a table descriptor.
98   struct TableDescriptor {
99     static base::Status Parse(const std::string& raw_descriptor,
100                               TableDescriptor* descriptor);
101 
IsPartitionedSpanJoinOperatorModule::TableDescriptor102     bool IsPartitioned() const { return !partition_col.empty(); }
103 
104     std::string name;
105     std::string partition_col;
106   };
107 
108   // Contains the definition of the child tables.
109   class TableDefinition {
110    public:
111     TableDefinition() = default;
112 
113     TableDefinition(std::string name,
114                     std::string partition_col,
115                     std::vector<std::pair<SqlValue::Type, std::string>> cols,
116                     EmitShadowType emit_shadow_type,
117                     uint32_t ts_idx,
118                     std::optional<uint32_t> dur_idx,
119                     uint32_t partition_idx);
120 
121     static base::Status Create(PerfettoSqlEngine* engine,
122                                const TableDescriptor& desc,
123                                EmitShadowType emit_shadow_type,
124                                TableDefinition* defn);
125 
126     // Creates an SQL query from the constraints and index,
127     std::string CreateSqlQuery(base::StringSplitter&,
128                                sqlite3_value** argv) const;
129 
130     // Creates the section of the "CREATE TABLE" corresponding to this
131     // definition.
132     std::string CreateVtabCreateTableSection() const;
133 
134     // Returns whether this table should emit present partition shadow slices.
ShouldEmitPresentPartitionShadowSpanJoinOperatorModule135     bool ShouldEmitPresentPartitionShadow() const {
136       return emit_shadow_type_ == EmitShadowType::kAll ||
137              emit_shadow_type_ == EmitShadowType::kPresentPartitionOnly;
138     }
139 
140     // Returns whether this table should emit missing partition shadow slices.
ShouldEmitMissingPartitionShadowSpanJoinOperatorModule141     bool ShouldEmitMissingPartitionShadow() const {
142       return emit_shadow_type_ == EmitShadowType::kAll;
143     }
144 
145     // Returns whether the table is partitioned.
IsPartitionedSpanJoinOperatorModule146     bool IsPartitioned() const { return !partition_col_.empty(); }
147 
nameSpanJoinOperatorModule148     const std::string& name() const { return name_; }
partition_colSpanJoinOperatorModule149     const std::string& partition_col() const { return partition_col_; }
columnsSpanJoinOperatorModule150     const std::vector<std::pair<SqlValue::Type, std::string>>& columns() const {
151       return cols_;
152     }
153 
ts_idxSpanJoinOperatorModule154     uint32_t ts_idx() const { return ts_idx_; }
dur_idxSpanJoinOperatorModule155     std::optional<uint32_t> dur_idx() const { return dur_idx_; }
partition_idxSpanJoinOperatorModule156     uint32_t partition_idx() const { return partition_idx_; }
157 
158    private:
159     EmitShadowType emit_shadow_type_ = EmitShadowType::kNone;
160 
161     std::string name_;
162     std::string partition_col_;
163     std::vector<std::pair<SqlValue::Type, std::string>> cols_;
164 
165     uint32_t ts_idx_ = std::numeric_limits<uint32_t>::max();
166     std::optional<uint32_t> dur_idx_;
167     uint32_t partition_idx_ = std::numeric_limits<uint32_t>::max();
168   };
169 
170   // Stores information about a single subquery into one of the two child
171   // tables.
172   //
173   // This class is implemented as a state machine which steps from one slice to
174   // the next.
175   class Query {
176    public:
177     // Enum encoding the current state of the query in the state machine.
178     enum class State {
179       // Encodes that the current slice is a real slice (i.e. comes directly
180       // from the cursor).
181       kReal,
182 
183       // Encodes that the current slice is on a partition for which there is a
184       // real slice present.
185       kPresentPartitionShadow,
186 
187       // Encodes that the current slice is on a paritition(s) for which there is
188       // no real slice for those partition(s).
189       kMissingPartitionShadow,
190 
191       // Encodes that this query has reached the end.
192       kEof,
193     };
194 
195     Query(SpanJoinOperatorModule::State*, const TableDefinition*);
196     virtual ~Query();
197 
198     Query(Query&&) noexcept = default;
199     Query& operator=(Query&&) = default;
200 
201     enum class InitialEofBehavior {
202       kTreatAsEof,
203       kTreatAsMissingPartitionShadow
204     };
205 
206     // Initializes the query with the given constraints and query parameters.
207     base::Status Initialize(
208         std::string sql,
209         InitialEofBehavior eof_behavior = InitialEofBehavior::kTreatAsEof);
210 
211     // Forwards the query to the next valid slice.
212     base::Status Next();
213 
214     // Rewinds the query to the first valid slice
215     // This is used in the mixed partitioning case where the query with no
216     // partitions is rewound to the start on every new partition.
217     base::Status Rewind();
218 
219     // Reports the column at the given index to given context.
220     void ReportSqliteResult(sqlite3_context* context, size_t index);
221 
222     // Returns whether the cursor has reached eof.
IsEofSpanJoinOperatorModule223     bool IsEof() const { return state_ == State::kEof; }
224 
225     // Returns whether the current slice pointed to is a real slice.
IsRealSpanJoinOperatorModule226     bool IsReal() const { return state_ == State::kReal; }
227 
228     // Returns the first partition this slice covers (for real/single partition
229     // shadows, this is the same as partition()).
230     // This partition encodes a [start, end] (closed at start and at end) range
231     // of partitions which works as the partitions are integers.
FirstPartitionSpanJoinOperatorModule232     int64_t FirstPartition() const {
233       PERFETTO_DCHECK(!IsEof());
234       return IsMissingPartitionShadow() ? missing_partition_start_
235                                         : partition();
236     }
237 
238     // Returns the last partition this slice covers (for real/single partition
239     // shadows, this is the same as partition()).
240     // This partition encodes a [start, end] (closed at start and at end) range
241     // of partitions which works as the partitions are integers.
LastPartitionSpanJoinOperatorModule242     int64_t LastPartition() const {
243       PERFETTO_DCHECK(!IsEof());
244       return IsMissingPartitionShadow() ? missing_partition_end_ - 1
245                                         : partition();
246     }
247 
248     // Returns the end timestamp of this slice adjusted to ensure that -1
249     // duration slices always returns ts.
AdjustedTsEndSpanJoinOperatorModule250     int64_t AdjustedTsEnd() const {
251       PERFETTO_DCHECK(!IsEof());
252       return ts_end_ - ts() == -1 ? ts() : ts_end_;
253     }
254 
tsSpanJoinOperatorModule255     int64_t ts() const {
256       PERFETTO_DCHECK(!IsEof());
257       return ts_;
258     }
partitionSpanJoinOperatorModule259     int64_t partition() const {
260       PERFETTO_DCHECK(!IsEof() && defn_->IsPartitioned());
261       return partition_;
262     }
263 
raw_ts_endSpanJoinOperatorModule264     int64_t raw_ts_end() const {
265       PERFETTO_DCHECK(!IsEof());
266       return ts_end_;
267     }
268 
definitionSpanJoinOperatorModule269     const TableDefinition* definition() const { return defn_; }
270 
271    private:
272     Query(Query&) = delete;
273     Query& operator=(const Query&) = delete;
274 
275     // Returns whether the current slice pointed to is a valid slice.
276     bool IsValidSlice();
277 
278     // Forwards the query to the next valid slice.
279     base::Status FindNextValidSlice();
280 
281     // Advances the query state machine by one slice.
282     base::Status NextSliceState();
283 
284     // Forwards the cursor to point to the next real slice.
285     base::Status CursorNext();
286 
287     // Returns whether the current slice pointed to is a present partition
288     // shadow.
IsPresentPartitionShadowSpanJoinOperatorModule289     bool IsPresentPartitionShadow() const {
290       return state_ == State::kPresentPartitionShadow;
291     }
292 
293     // Returns whether the current slice pointed to is a missing partition
294     // shadow.
IsMissingPartitionShadowSpanJoinOperatorModule295     bool IsMissingPartitionShadow() const {
296       return state_ == State::kMissingPartitionShadow;
297     }
298 
299     // Returns whether the current slice pointed to is an empty shadow.
IsEmptyShadowSpanJoinOperatorModule300     bool IsEmptyShadow() const {
301       PERFETTO_DCHECK(!IsEof());
302       return (!IsReal() && ts_ == ts_end_) ||
303              (IsMissingPartitionShadow() &&
304               missing_partition_start_ == missing_partition_end_);
305     }
306 
CursorTsSpanJoinOperatorModule307     int64_t CursorTs() const {
308       PERFETTO_DCHECK(!cursor_eof_);
309       auto ts_idx = static_cast<int>(defn_->ts_idx());
310       return sqlite3_column_int64(stmt_->sqlite_stmt(), ts_idx);
311     }
312 
CursorDurSpanJoinOperatorModule313     int64_t CursorDur() const {
314       PERFETTO_DCHECK(!cursor_eof_);
315       if (!defn_->dur_idx().has_value()) {
316         return 0;
317       }
318       auto dur_idx = static_cast<int>(defn_->dur_idx().value());
319       return sqlite3_column_int64(stmt_->sqlite_stmt(), dur_idx);
320     }
321 
CursorPartitionSpanJoinOperatorModule322     int64_t CursorPartition() const {
323       PERFETTO_DCHECK(!cursor_eof_);
324       PERFETTO_DCHECK(defn_->IsPartitioned());
325       auto partition_idx = static_cast<int>(defn_->partition_idx());
326       return sqlite3_column_int64(stmt_->sqlite_stmt(), partition_idx);
327     }
328 
329     State state_ = State::kMissingPartitionShadow;
330     bool cursor_eof_ = false;
331 
332     // Only valid when |state_| != kEof.
333     int64_t ts_ = 0;
334     int64_t ts_end_ = std::numeric_limits<int64_t>::max();
335 
336     // Only valid when |state_| == kReal or |state_| == kPresentPartitionShadow.
337     int64_t partition_ = std::numeric_limits<int64_t>::min();
338 
339     // Only valid when |state_| == kMissingPartitionShadow.
340     int64_t missing_partition_start_ = 0;
341     int64_t missing_partition_end_ = 0;
342 
343     std::string sql_query_;
344     std::optional<SqliteEngine::PreparedStatement> stmt_;
345 
346     const TableDefinition* defn_ = nullptr;
347     SpanJoinOperatorModule::State* in_state_ = nullptr;
348   };
349 
350   // Columns of the span operator table.
351   enum Column {
352     kTimestamp = 0,
353     kDuration = 1,
354     kPartition = 2,
355     // All other columns are dynamic depending on the joined tables.
356   };
357 
358   // Enum indicating the possible partitionings of the two tables in span join.
359   enum class PartitioningType {
360     // Used when both tables don't have a partition specified.
361     kNoPartitioning = 0,
362 
363     // Used when both tables have the same partition specified.
364     kSamePartitioning = 1,
365 
366     // Used when one table has a partition and the other table doesn't.
367     kMixedPartitioning = 2
368   };
369 
370   // Identifier for a column by index in a given table.
371   struct ColumnLocator {
372     const TableDefinition* defn;
373     size_t col_index;
374   };
375 
376   struct Context {
ContextSpanJoinOperatorModule::Context377     explicit Context(PerfettoSqlEngine* _engine) : engine(_engine) {}
378 
379     PerfettoSqlEngine* engine;
380     sqlite::ModuleStateManager<SpanJoinOperatorModule> manager;
381   };
382   struct State {
IsLeftJoinSpanJoinOperatorModule::State383     bool IsLeftJoin() const {
384       return base::CaseInsensitiveEqual(module_name, "span_left_join");
385     }
IsOuterJoinSpanJoinOperatorModule::State386     bool IsOuterJoin() const {
387       return base::CaseInsensitiveEqual(module_name, "span_outer_join");
388     }
389 
partition_colSpanJoinOperatorModule::State390     const std::string& partition_col() const {
391       return t1_defn.IsPartitioned() ? t1_defn.partition_col()
392                                      : t2_defn.partition_col();
393     }
394 
395     std::string GetNameForGlobalColumnIndex(const TableDefinition& defn,
396                                             int global_column);
397 
398     std::string BestIndexStrForDefinition(const sqlite3_index_info* info,
399                                           const TableDefinition& defn);
400 
401     void PopulateColumnLocatorMap(uint32_t);
402 
403     PerfettoSqlEngine* engine;
404     std::string module_name;
405     std::string create_table_stmt;
406     TableDefinition t1_defn;
407     TableDefinition t2_defn;
408     PartitioningType partitioning;
409     base::FlatHashMap<size_t, ColumnLocator> global_index_to_column_locator;
410   };
411   struct Vtab : public sqlite3_vtab {
412     sqlite::ModuleStateManager<SpanJoinOperatorModule>::PerVtabState* state;
413   };
414 
415   // Base class for a cursor on the span table.
416   struct Cursor final : public sqlite3_vtab_cursor {
CursorSpanJoinOperatorModule::final417     explicit Cursor(State* _state)
418         : t1(_state, &_state->t1_defn),
419           t2(_state, &_state->t2_defn),
420           state(_state) {}
421 
422     bool IsOverlappingSpan() const;
423     base::Status FindOverlappingSpan();
424     Query* FindEarliestFinishQuery();
425 
426     Query t1;
427     Query t2;
428 
429     Query* next_query = nullptr;
430 
431     // Only valid for kMixedPartition.
432     int64_t last_mixed_partition_ = std::numeric_limits<int64_t>::min();
433 
434     State* state;
435   };
436 
437   static constexpr bool kSupportsWrites = false;
438 
439   static int Create(sqlite3*,
440                     void*,
441                     int,
442                     const char* const*,
443                     sqlite3_vtab**,
444                     char**);
445   static int Destroy(sqlite3_vtab*);
446 
447   static int Connect(sqlite3*,
448                      void*,
449                      int,
450                      const char* const*,
451                      sqlite3_vtab**,
452                      char**);
453   static int Disconnect(sqlite3_vtab*);
454 
455   static int BestIndex(sqlite3_vtab*, sqlite3_index_info*);
456 
457   static int Open(sqlite3_vtab*, sqlite3_vtab_cursor**);
458   static int Close(sqlite3_vtab_cursor*);
459 
460   static int Filter(sqlite3_vtab_cursor*,
461                     int,
462                     const char*,
463                     int,
464                     sqlite3_value**);
465   static int Next(sqlite3_vtab_cursor*);
466   static int Eof(sqlite3_vtab_cursor*);
467   static int Column(sqlite3_vtab_cursor*, sqlite3_context*, int);
468   static int Rowid(sqlite3_vtab_cursor*, sqlite_int64*);
469 
470   static int FindFunction(sqlite3_vtab*,
471                           int,
472                           const char*,
473                           FindFunctionFn**,
474                           void**);
475 
476   // This needs to happen at the end as it depends on the functions
477   // defined above.
478   static constexpr sqlite3_module kModule = CreateModule();
479 };
480 
481 }  // namespace perfetto::trace_processor
482 
483 #endif  // SRC_TRACE_PROCESSOR_PERFETTO_SQL_INTRINSICS_OPERATORS_SPAN_JOIN_OPERATOR_H_
484