• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2018 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef SRC_TRACE_PROCESSOR_PRELUDE_OPERATORS_SPAN_JOIN_OPERATOR_H_
18 #define SRC_TRACE_PROCESSOR_PRELUDE_OPERATORS_SPAN_JOIN_OPERATOR_H_
19 
20 #include <sqlite3.h>
21 
22 #include <array>
23 #include <deque>
24 #include <limits>
25 #include <map>
26 #include <memory>
27 #include <string>
28 #include <vector>
29 
30 #include "perfetto/ext/base/flat_hash_map.h"
31 #include "perfetto/ext/base/string_utils.h"
32 #include "perfetto/trace_processor/basic_types.h"
33 #include "perfetto/trace_processor/status.h"
34 #include "src/trace_processor/sqlite/scoped_db.h"
35 #include "src/trace_processor/sqlite/sqlite_table.h"
36 
37 namespace perfetto {
38 namespace trace_processor {
39 
40 // Implements the SPAN JOIN operation between two tables on a particular column.
41 //
42 // Span:
43 // A span is a row with a timestamp and a duration. It is used to model
44 // operations which run for a particular *span* of time.
45 //
46 // We draw spans like so (time on the x-axis):
47 // start of span->[ time where opertion is running ]<- end of span
48 //
49 // Multiple spans can happen in parallel:
50 // [      ]
51 //    [        ]
52 //   [                    ]
53 //  [ ]
54 //
55 // The above for example, models scheduling activity on a 4-core computer for a
56 // short period of time.
57 //
58 // Span join:
59 // The span join operation can be thought of as the intersection of span tables.
60 // That is, the join table has a span for each pair of spans in the child tables
61 // where the spans overlap. Because many spans are possible in parallel, an
62 // extra metadata column (labelled the "join column") is used to distinguish
63 // between the spanned tables.
64 //
65 // For a given join key suppose these were the two span tables:
66 // Table 1:   [        ]              [      ]         [ ]
67 // Table 2:          [      ]            [  ]           [      ]
68 // Output :          [ ]                 [  ]           []
69 //
70 // All other columns apart from timestamp (ts), duration (dur) and the join key
71 // are passed through unchanged.
72 class SpanJoinOperatorTable final
73     : public TypedSqliteTable<SpanJoinOperatorTable, const void*> {
74  public:
75   // Enum indicating whether the queries on the two inner tables should
76   // emit shadows.
77   enum class EmitShadowType {
78     // Used when the table should emit all shadow slices (both present and
79     // missing partition shadows).
80     kAll,
81 
82     // Used when the table should only emit shadows for partitions which are
83     // present.
84     kPresentPartitionOnly,
85 
86     // Used when the table should emit no shadow slices.
87     kNone,
88   };
89 
90   // Contains the definition of the child tables.
91   class TableDefinition {
92    public:
93     TableDefinition() = default;
94 
95     TableDefinition(std::string name,
96                     std::string partition_col,
97                     std::vector<SqliteTable::Column> cols,
98                     EmitShadowType emit_shadow_type,
99                     uint32_t ts_idx,
100                     uint32_t dur_idx,
101                     uint32_t partition_idx);
102 
103     // Returns whether this table should emit present partition shadow slices.
ShouldEmitPresentPartitionShadow()104     bool ShouldEmitPresentPartitionShadow() const {
105       return emit_shadow_type_ == EmitShadowType::kAll ||
106              emit_shadow_type_ == EmitShadowType::kPresentPartitionOnly;
107     }
108 
109     // Returns whether this table should emit missing partition shadow slices.
ShouldEmitMissingPartitionShadow()110     bool ShouldEmitMissingPartitionShadow() const {
111       return emit_shadow_type_ == EmitShadowType::kAll;
112     }
113 
114     // Returns whether the table is partitioned.
IsPartitioned()115     bool IsPartitioned() const { return !partition_col_.empty(); }
116 
name()117     const std::string& name() const { return name_; }
partition_col()118     const std::string& partition_col() const { return partition_col_; }
columns()119     const std::vector<SqliteTable::Column>& columns() const { return cols_; }
120 
ts_idx()121     uint32_t ts_idx() const { return ts_idx_; }
dur_idx()122     uint32_t dur_idx() const { return dur_idx_; }
partition_idx()123     uint32_t partition_idx() const { return partition_idx_; }
124 
125    private:
126     EmitShadowType emit_shadow_type_ = EmitShadowType::kNone;
127 
128     std::string name_;
129     std::string partition_col_;
130     std::vector<SqliteTable::Column> cols_;
131 
132     uint32_t ts_idx_ = std::numeric_limits<uint32_t>::max();
133     uint32_t dur_idx_ = std::numeric_limits<uint32_t>::max();
134     uint32_t partition_idx_ = std::numeric_limits<uint32_t>::max();
135   };
136 
137   // Stores information about a single subquery into one of the two child
138   // tables.
139   //
140   // This class is implemented as a state machine which steps from one slice to
141   // the next.
142   class Query {
143    public:
144     // Enum encoding the current state of the query in the state machine.
145     enum class State {
146       // Encodes that the current slice is a real slice (i.e. comes directly
147       // from the cursor).
148       kReal,
149 
150       // Encodes that the current slice is on a partition for which there is a
151       // real slice present.
152       kPresentPartitionShadow,
153 
154       // Encodes that the current slice is on a paritition(s) for which there is
155       // no real slice for those partition(s).
156       kMissingPartitionShadow,
157 
158       // Encodes that this query has reached the end.
159       kEof,
160     };
161 
162     Query(SpanJoinOperatorTable*, const TableDefinition*, sqlite3* db);
163     virtual ~Query();
164 
165     Query(Query&&) noexcept = default;
166     Query& operator=(Query&&) = default;
167 
168     enum class InitialEofBehavior {
169       kTreatAsEof,
170       kTreatAsMissingPartitionShadow
171     };
172 
173     // Initializes the query with the given constraints and query parameters.
174     util::Status Initialize(
175         const QueryConstraints& qc,
176         sqlite3_value** argv,
177         InitialEofBehavior eof_behavior = InitialEofBehavior::kTreatAsEof);
178 
179     // Forwards the query to the next valid slice.
180     util::Status Next();
181 
182     // Rewinds the query to the first valid slice
183     // This is used in the mixed partitioning case where the query with no
184     // partitions is rewound to the start on every new partition.
185     util::Status Rewind();
186 
187     // Reports the column at the given index to given context.
188     void ReportSqliteResult(sqlite3_context* context, size_t index);
189 
190     // Returns whether the cursor has reached eof.
IsEof()191     bool IsEof() const { return state_ == State::kEof; }
192 
193     // Returns whether the current slice pointed to is a real slice.
IsReal()194     bool IsReal() const { return state_ == State::kReal; }
195 
196     // Returns the first partition this slice covers (for real/single partition
197     // shadows, this is the same as partition()).
198     // This partition encodes a [start, end] (closed at start and at end) range
199     // of partitions which works as the partitions are integers.
FirstPartition()200     int64_t FirstPartition() const {
201       PERFETTO_DCHECK(!IsEof());
202       return IsMissingPartitionShadow() ? missing_partition_start_
203                                         : partition();
204     }
205 
206     // Returns the last partition this slice covers (for real/single partition
207     // shadows, this is the same as partition()).
208     // This partition encodes a [start, end] (closed at start and at end) range
209     // of partitions which works as the partitions are integers.
LastPartition()210     int64_t LastPartition() const {
211       PERFETTO_DCHECK(!IsEof());
212       return IsMissingPartitionShadow() ? missing_partition_end_ - 1
213                                         : partition();
214     }
215 
216     // Returns the end timestamp of this slice adjusted to ensure that -1
217     // duration slices always returns ts.
AdjustedTsEnd()218     int64_t AdjustedTsEnd() const {
219       PERFETTO_DCHECK(!IsEof());
220       return ts_end_ - ts() == -1 ? ts() : ts_end_;
221     }
222 
ts()223     int64_t ts() const {
224       PERFETTO_DCHECK(!IsEof());
225       return ts_;
226     }
partition()227     int64_t partition() const {
228       PERFETTO_DCHECK(!IsEof() && defn_->IsPartitioned());
229       return partition_;
230     }
231 
raw_ts_end()232     int64_t raw_ts_end() const {
233       PERFETTO_DCHECK(!IsEof());
234       return ts_end_;
235     }
236 
definition()237     const TableDefinition* definition() const { return defn_; }
238 
239    private:
240     Query(Query&) = delete;
241     Query& operator=(const Query&) = delete;
242 
243     // Returns whether the current slice pointed to is a valid slice.
244     bool IsValidSlice();
245 
246     // Forwards the query to the next valid slice.
247     util::Status FindNextValidSlice();
248 
249     // Advances the query state machine by one slice.
250     util::Status NextSliceState();
251 
252     // Forwards the cursor to point to the next real slice.
253     util::Status CursorNext();
254 
255     // Creates an SQL query from the given set of constraint strings.
256     std::string CreateSqlQuery(const std::vector<std::string>& cs) const;
257 
258     // Returns whether the current slice pointed to is a present partition
259     // shadow.
IsPresentPartitionShadow()260     bool IsPresentPartitionShadow() const {
261       return state_ == State::kPresentPartitionShadow;
262     }
263 
264     // Returns whether the current slice pointed to is a missing partition
265     // shadow.
IsMissingPartitionShadow()266     bool IsMissingPartitionShadow() const {
267       return state_ == State::kMissingPartitionShadow;
268     }
269 
270     // Returns whether the current slice pointed to is an empty shadow.
IsEmptyShadow()271     bool IsEmptyShadow() const {
272       PERFETTO_DCHECK(!IsEof());
273       return (!IsReal() && ts_ == ts_end_) ||
274              (IsMissingPartitionShadow() &&
275               missing_partition_start_ == missing_partition_end_);
276     }
277 
CursorTs()278     int64_t CursorTs() const {
279       PERFETTO_DCHECK(!cursor_eof_);
280       auto ts_idx = static_cast<int>(defn_->ts_idx());
281       return sqlite3_column_int64(stmt_.get(), ts_idx);
282     }
283 
CursorDur()284     int64_t CursorDur() const {
285       PERFETTO_DCHECK(!cursor_eof_);
286       auto dur_idx = static_cast<int>(defn_->dur_idx());
287       return sqlite3_column_int64(stmt_.get(), dur_idx);
288     }
289 
CursorPartition()290     int64_t CursorPartition() const {
291       PERFETTO_DCHECK(!cursor_eof_);
292       PERFETTO_DCHECK(defn_->IsPartitioned());
293       auto partition_idx = static_cast<int>(defn_->partition_idx());
294       return sqlite3_column_int64(stmt_.get(), partition_idx);
295     }
296 
297     State state_ = State::kMissingPartitionShadow;
298     bool cursor_eof_ = false;
299 
300     // Only valid when |state_| != kEof.
301     int64_t ts_ = 0;
302     int64_t ts_end_ = std::numeric_limits<int64_t>::max();
303 
304     // Only valid when |state_| == kReal or |state_| == kPresentPartitionShadow.
305     int64_t partition_ = std::numeric_limits<int64_t>::min();
306 
307     // Only valid when |state_| == kMissingPartitionShadow.
308     int64_t missing_partition_start_ = 0;
309     int64_t missing_partition_end_ = 0;
310 
311     std::string sql_query_;
312     ScopedStmt stmt_;
313 
314     const TableDefinition* defn_ = nullptr;
315     sqlite3* db_ = nullptr;
316     SpanJoinOperatorTable* table_ = nullptr;
317   };
318 
319   // Base class for a cursor on the span table.
320   class Cursor final : public SqliteTable::BaseCursor {
321    public:
322     Cursor(SpanJoinOperatorTable*, sqlite3* db);
323     ~Cursor() final;
324 
325     base::Status Filter(const QueryConstraints& qc,
326                         sqlite3_value** argv,
327                         FilterHistory);
328     base::Status Next();
329     base::Status Column(sqlite3_context* context, int N);
330     bool Eof();
331 
332    private:
333     Cursor(Cursor&) = delete;
334     Cursor& operator=(const Cursor&) = delete;
335 
336     Cursor(Cursor&&) noexcept = default;
337     Cursor& operator=(Cursor&&) = default;
338 
339     bool IsOverlappingSpan();
340     util::Status FindOverlappingSpan();
341     Query* FindEarliestFinishQuery();
342 
343     Query t1_;
344     Query t2_;
345 
346     Query* next_query_ = nullptr;
347 
348     // Only valid for kMixedPartition.
349     int64_t last_mixed_partition_ = std::numeric_limits<int64_t>::min();
350 
351     SpanJoinOperatorTable* table_;
352   };
353 
354   SpanJoinOperatorTable(sqlite3*, const void*);
355   ~SpanJoinOperatorTable() final;
356 
357   // Table implementation.
358   util::Status Init(int, const char* const*, SqliteTable::Schema*) final;
359   std::unique_ptr<SqliteTable::BaseCursor> CreateCursor() final;
360   int BestIndex(const QueryConstraints& qc, BestIndexInfo* info) final;
361   int FindFunction(const char* name, FindFunctionFn* fn, void** args) final;
362 
363  private:
364   // Columns of the span operator table.
365   enum Column {
366     kTimestamp = 0,
367     kDuration = 1,
368     kPartition = 2,
369     // All other columns are dynamic depending on the joined tables.
370   };
371 
372   // Enum indicating the possible partitionings of the two tables in span join.
373   enum class PartitioningType {
374     // Used when both tables don't have a partition specified.
375     kNoPartitioning = 0,
376 
377     // Used when both tables have the same partition specified.
378     kSamePartitioning = 1,
379 
380     // Used when one table has a partition and the other table doesn't.
381     kMixedPartitioning = 2
382   };
383 
384   // Parsed version of a table descriptor.
385   struct TableDescriptor {
386     static util::Status Parse(const std::string& raw_descriptor,
387                               TableDescriptor* descriptor);
388 
IsPartitionedTableDescriptor389     bool IsPartitioned() const { return !partition_col.empty(); }
390 
391     std::string name;
392     std::string partition_col;
393   };
394 
395   // Identifier for a column by index in a given table.
396   struct ColumnLocator {
397     const TableDefinition* defn;
398     size_t col_index;
399   };
400 
IsLeftJoin()401   bool IsLeftJoin() const {
402     return base::CaseInsensitiveEqual(module_name(), "span_left_join");
403   }
IsOuterJoin()404   bool IsOuterJoin() const {
405     return base::CaseInsensitiveEqual(module_name(), "span_outer_join");
406   }
407 
partition_col()408   const std::string& partition_col() const {
409     return t1_defn_.IsPartitioned() ? t1_defn_.partition_col()
410                                     : t2_defn_.partition_col();
411   }
412 
413   util::Status CreateTableDefinition(
414       const TableDescriptor& desc,
415       EmitShadowType emit_shadow_type,
416       SpanJoinOperatorTable::TableDefinition* defn);
417 
418   std::vector<std::string> ComputeSqlConstraintsForDefinition(
419       const TableDefinition& defn,
420       const QueryConstraints& qc,
421       sqlite3_value** argv);
422 
423   std::string GetNameForGlobalColumnIndex(const TableDefinition& defn,
424                                           int global_column);
425 
426   void CreateSchemaColsForDefn(const TableDefinition& defn,
427                                std::vector<SqliteTable::Column>* cols);
428 
429   TableDefinition t1_defn_;
430   TableDefinition t2_defn_;
431   PartitioningType partitioning_;
432   base::FlatHashMap<size_t, ColumnLocator> global_index_to_column_locator_;
433 
434   sqlite3* const db_;
435 };
436 
437 }  // namespace trace_processor
438 }  // namespace perfetto
439 
440 #endif  // SRC_TRACE_PROCESSOR_PRELUDE_OPERATORS_SPAN_JOIN_OPERATOR_H_
441