1 /* 2 * Copyright (C) 2018 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef SRC_TRACE_PROCESSOR_PERFETTO_SQL_INTRINSICS_OPERATORS_SPAN_JOIN_OPERATOR_H_ 18 #define SRC_TRACE_PROCESSOR_PERFETTO_SQL_INTRINSICS_OPERATORS_SPAN_JOIN_OPERATOR_H_ 19 20 #include <cstddef> 21 #include <cstdint> 22 #include <limits> 23 #include <optional> 24 #include <string> 25 #include <utility> 26 #include <vector> 27 28 #include "perfetto/base/logging.h" 29 #include "perfetto/base/status.h" 30 #include "perfetto/ext/base/flat_hash_map.h" 31 #include "perfetto/ext/base/string_splitter.h" 32 #include "perfetto/ext/base/string_utils.h" 33 #include "perfetto/trace_processor/basic_types.h" 34 #include "src/trace_processor/sqlite/bindings/sqlite_module.h" 35 #include "src/trace_processor/sqlite/module_lifecycle_manager.h" 36 #include "src/trace_processor/sqlite/sqlite_engine.h" 37 38 namespace perfetto::trace_processor { 39 40 class PerfettoSqlEngine; 41 struct SpanJoinOperatorModule; 42 43 // Implements the SPAN JOIN operation between two tables on a particular column. 44 // 45 // Span: 46 // A span is a row with a timestamp and a duration. It is used to model 47 // operations which run for a particular *span* of time. 48 // 49 // We draw spans like so (time on the x-axis): 50 // start of span->[ time where opertion is running ]<- end of span 51 // 52 // Multiple spans can happen in parallel: 53 // [ ] 54 // [ ] 55 // [ ] 56 // [ ] 57 // 58 // The above for example, models scheduling activity on a 4-core computer for a 59 // short period of time. 60 // 61 // Span join: 62 // The span join operation can be thought of as the intersection of span tables. 63 // That is, the join table has a span for each pair of spans in the child tables 64 // where the spans overlap. Because many spans are possible in parallel, an 65 // extra metadata column (labelled the "join column") is used to distinguish 66 // between the spanned tables. 67 // 68 // For a given join key suppose these were the two span tables: 69 // Table 1: [ ] [ ] [ ] 70 // Table 2: [ ] [ ] [ ] 71 // Output : [ ] [ ] [] 72 // 73 // All other columns apart from timestamp (ts), duration (dur) and the join key 74 // are passed through unchanged. 75 struct SpanJoinOperatorModule : public sqlite::Module<SpanJoinOperatorModule> { 76 public: 77 static constexpr uint32_t kSourceGeqOpCode = 78 SQLITE_INDEX_CONSTRAINT_FUNCTION + 1; 79 80 struct State; 81 82 // Enum indicating whether the queries on the two inner tables should 83 // emit shadows. 84 enum class EmitShadowType { 85 // Used when the table should emit all shadow slices (both present and 86 // missing partition shadows). 87 kAll, 88 89 // Used when the table should only emit shadows for partitions which are 90 // present. 91 kPresentPartitionOnly, 92 93 // Used when the table should emit no shadow slices. 94 kNone, 95 }; 96 97 // Parsed version of a table descriptor. 98 struct TableDescriptor { 99 static base::Status Parse(const std::string& raw_descriptor, 100 TableDescriptor* descriptor); 101 IsPartitionedSpanJoinOperatorModule::TableDescriptor102 bool IsPartitioned() const { return !partition_col.empty(); } 103 104 std::string name; 105 std::string partition_col; 106 }; 107 108 // Contains the definition of the child tables. 109 class TableDefinition { 110 public: 111 TableDefinition() = default; 112 113 TableDefinition(std::string name, 114 std::string partition_col, 115 std::vector<std::pair<SqlValue::Type, std::string>> cols, 116 EmitShadowType emit_shadow_type, 117 uint32_t ts_idx, 118 std::optional<uint32_t> dur_idx, 119 uint32_t partition_idx); 120 121 static base::Status Create(PerfettoSqlEngine* engine, 122 const TableDescriptor& desc, 123 EmitShadowType emit_shadow_type, 124 TableDefinition* defn); 125 126 // Creates an SQL query from the constraints and index, 127 std::string CreateSqlQuery(base::StringSplitter&, 128 sqlite3_value** argv) const; 129 130 // Creates the section of the "CREATE TABLE" corresponding to this 131 // definition. 132 std::string CreateVtabCreateTableSection() const; 133 134 // Returns whether this table should emit present partition shadow slices. ShouldEmitPresentPartitionShadowSpanJoinOperatorModule135 bool ShouldEmitPresentPartitionShadow() const { 136 return emit_shadow_type_ == EmitShadowType::kAll || 137 emit_shadow_type_ == EmitShadowType::kPresentPartitionOnly; 138 } 139 140 // Returns whether this table should emit missing partition shadow slices. ShouldEmitMissingPartitionShadowSpanJoinOperatorModule141 bool ShouldEmitMissingPartitionShadow() const { 142 return emit_shadow_type_ == EmitShadowType::kAll; 143 } 144 145 // Returns whether the table is partitioned. IsPartitionedSpanJoinOperatorModule146 bool IsPartitioned() const { return !partition_col_.empty(); } 147 nameSpanJoinOperatorModule148 const std::string& name() const { return name_; } partition_colSpanJoinOperatorModule149 const std::string& partition_col() const { return partition_col_; } columnsSpanJoinOperatorModule150 const std::vector<std::pair<SqlValue::Type, std::string>>& columns() const { 151 return cols_; 152 } 153 ts_idxSpanJoinOperatorModule154 uint32_t ts_idx() const { return ts_idx_; } dur_idxSpanJoinOperatorModule155 std::optional<uint32_t> dur_idx() const { return dur_idx_; } partition_idxSpanJoinOperatorModule156 uint32_t partition_idx() const { return partition_idx_; } 157 158 private: 159 EmitShadowType emit_shadow_type_ = EmitShadowType::kNone; 160 161 std::string name_; 162 std::string partition_col_; 163 std::vector<std::pair<SqlValue::Type, std::string>> cols_; 164 165 uint32_t ts_idx_ = std::numeric_limits<uint32_t>::max(); 166 std::optional<uint32_t> dur_idx_; 167 uint32_t partition_idx_ = std::numeric_limits<uint32_t>::max(); 168 }; 169 170 // Stores information about a single subquery into one of the two child 171 // tables. 172 // 173 // This class is implemented as a state machine which steps from one slice to 174 // the next. 175 class Query { 176 public: 177 // Enum encoding the current state of the query in the state machine. 178 enum class State { 179 // Encodes that the current slice is a real slice (i.e. comes directly 180 // from the cursor). 181 kReal, 182 183 // Encodes that the current slice is on a partition for which there is a 184 // real slice present. 185 kPresentPartitionShadow, 186 187 // Encodes that the current slice is on a paritition(s) for which there is 188 // no real slice for those partition(s). 189 kMissingPartitionShadow, 190 191 // Encodes that this query has reached the end. 192 kEof, 193 }; 194 195 Query(SpanJoinOperatorModule::State*, const TableDefinition*); 196 virtual ~Query(); 197 198 Query(Query&&) noexcept = default; 199 Query& operator=(Query&&) = default; 200 201 enum class InitialEofBehavior { 202 kTreatAsEof, 203 kTreatAsMissingPartitionShadow 204 }; 205 206 // Initializes the query with the given constraints and query parameters. 207 base::Status Initialize( 208 std::string sql, 209 InitialEofBehavior eof_behavior = InitialEofBehavior::kTreatAsEof); 210 211 // Forwards the query to the next valid slice. 212 base::Status Next(); 213 214 // Rewinds the query to the first valid slice 215 // This is used in the mixed partitioning case where the query with no 216 // partitions is rewound to the start on every new partition. 217 base::Status Rewind(); 218 219 // Reports the column at the given index to given context. 220 void ReportSqliteResult(sqlite3_context* context, size_t index); 221 222 // Returns whether the cursor has reached eof. IsEofSpanJoinOperatorModule223 bool IsEof() const { return state_ == State::kEof; } 224 225 // Returns whether the current slice pointed to is a real slice. IsRealSpanJoinOperatorModule226 bool IsReal() const { return state_ == State::kReal; } 227 228 // Returns the first partition this slice covers (for real/single partition 229 // shadows, this is the same as partition()). 230 // This partition encodes a [start, end] (closed at start and at end) range 231 // of partitions which works as the partitions are integers. FirstPartitionSpanJoinOperatorModule232 int64_t FirstPartition() const { 233 PERFETTO_DCHECK(!IsEof()); 234 return IsMissingPartitionShadow() ? missing_partition_start_ 235 : partition(); 236 } 237 238 // Returns the last partition this slice covers (for real/single partition 239 // shadows, this is the same as partition()). 240 // This partition encodes a [start, end] (closed at start and at end) range 241 // of partitions which works as the partitions are integers. LastPartitionSpanJoinOperatorModule242 int64_t LastPartition() const { 243 PERFETTO_DCHECK(!IsEof()); 244 return IsMissingPartitionShadow() ? missing_partition_end_ - 1 245 : partition(); 246 } 247 248 // Returns the end timestamp of this slice adjusted to ensure that -1 249 // duration slices always returns ts. AdjustedTsEndSpanJoinOperatorModule250 int64_t AdjustedTsEnd() const { 251 PERFETTO_DCHECK(!IsEof()); 252 return ts_end_ - ts() == -1 ? ts() : ts_end_; 253 } 254 tsSpanJoinOperatorModule255 int64_t ts() const { 256 PERFETTO_DCHECK(!IsEof()); 257 return ts_; 258 } partitionSpanJoinOperatorModule259 int64_t partition() const { 260 PERFETTO_DCHECK(!IsEof() && defn_->IsPartitioned()); 261 return partition_; 262 } 263 raw_ts_endSpanJoinOperatorModule264 int64_t raw_ts_end() const { 265 PERFETTO_DCHECK(!IsEof()); 266 return ts_end_; 267 } 268 definitionSpanJoinOperatorModule269 const TableDefinition* definition() const { return defn_; } 270 271 private: 272 Query(Query&) = delete; 273 Query& operator=(const Query&) = delete; 274 275 // Returns whether the current slice pointed to is a valid slice. 276 bool IsValidSlice(); 277 278 // Forwards the query to the next valid slice. 279 base::Status FindNextValidSlice(); 280 281 // Advances the query state machine by one slice. 282 base::Status NextSliceState(); 283 284 // Forwards the cursor to point to the next real slice. 285 base::Status CursorNext(); 286 287 // Returns whether the current slice pointed to is a present partition 288 // shadow. IsPresentPartitionShadowSpanJoinOperatorModule289 bool IsPresentPartitionShadow() const { 290 return state_ == State::kPresentPartitionShadow; 291 } 292 293 // Returns whether the current slice pointed to is a missing partition 294 // shadow. IsMissingPartitionShadowSpanJoinOperatorModule295 bool IsMissingPartitionShadow() const { 296 return state_ == State::kMissingPartitionShadow; 297 } 298 299 // Returns whether the current slice pointed to is an empty shadow. IsEmptyShadowSpanJoinOperatorModule300 bool IsEmptyShadow() const { 301 PERFETTO_DCHECK(!IsEof()); 302 return (!IsReal() && ts_ == ts_end_) || 303 (IsMissingPartitionShadow() && 304 missing_partition_start_ == missing_partition_end_); 305 } 306 CursorTsSpanJoinOperatorModule307 int64_t CursorTs() const { 308 PERFETTO_DCHECK(!cursor_eof_); 309 auto ts_idx = static_cast<int>(defn_->ts_idx()); 310 return sqlite3_column_int64(stmt_->sqlite_stmt(), ts_idx); 311 } 312 CursorDurSpanJoinOperatorModule313 int64_t CursorDur() const { 314 PERFETTO_DCHECK(!cursor_eof_); 315 if (!defn_->dur_idx().has_value()) { 316 return 0; 317 } 318 auto dur_idx = static_cast<int>(defn_->dur_idx().value()); 319 return sqlite3_column_int64(stmt_->sqlite_stmt(), dur_idx); 320 } 321 CursorPartitionSpanJoinOperatorModule322 int64_t CursorPartition() const { 323 PERFETTO_DCHECK(!cursor_eof_); 324 PERFETTO_DCHECK(defn_->IsPartitioned()); 325 auto partition_idx = static_cast<int>(defn_->partition_idx()); 326 return sqlite3_column_int64(stmt_->sqlite_stmt(), partition_idx); 327 } 328 329 State state_ = State::kMissingPartitionShadow; 330 bool cursor_eof_ = false; 331 332 // Only valid when |state_| != kEof. 333 int64_t ts_ = 0; 334 int64_t ts_end_ = std::numeric_limits<int64_t>::max(); 335 336 // Only valid when |state_| == kReal or |state_| == kPresentPartitionShadow. 337 int64_t partition_ = std::numeric_limits<int64_t>::min(); 338 339 // Only valid when |state_| == kMissingPartitionShadow. 340 int64_t missing_partition_start_ = 0; 341 int64_t missing_partition_end_ = 0; 342 343 std::string sql_query_; 344 std::optional<SqliteEngine::PreparedStatement> stmt_; 345 346 const TableDefinition* defn_ = nullptr; 347 SpanJoinOperatorModule::State* in_state_ = nullptr; 348 }; 349 350 // Columns of the span operator table. 351 enum Column { 352 kTimestamp = 0, 353 kDuration = 1, 354 kPartition = 2, 355 // All other columns are dynamic depending on the joined tables. 356 }; 357 358 // Enum indicating the possible partitionings of the two tables in span join. 359 enum class PartitioningType { 360 // Used when both tables don't have a partition specified. 361 kNoPartitioning = 0, 362 363 // Used when both tables have the same partition specified. 364 kSamePartitioning = 1, 365 366 // Used when one table has a partition and the other table doesn't. 367 kMixedPartitioning = 2 368 }; 369 370 // Identifier for a column by index in a given table. 371 struct ColumnLocator { 372 const TableDefinition* defn; 373 size_t col_index; 374 }; 375 376 struct Context { ContextSpanJoinOperatorModule::Context377 explicit Context(PerfettoSqlEngine* _engine) : engine(_engine) {} 378 379 PerfettoSqlEngine* engine; 380 sqlite::ModuleStateManager<SpanJoinOperatorModule> manager; 381 }; 382 struct State { IsLeftJoinSpanJoinOperatorModule::State383 bool IsLeftJoin() const { 384 return base::CaseInsensitiveEqual(module_name, "span_left_join"); 385 } IsOuterJoinSpanJoinOperatorModule::State386 bool IsOuterJoin() const { 387 return base::CaseInsensitiveEqual(module_name, "span_outer_join"); 388 } 389 partition_colSpanJoinOperatorModule::State390 const std::string& partition_col() const { 391 return t1_defn.IsPartitioned() ? t1_defn.partition_col() 392 : t2_defn.partition_col(); 393 } 394 395 std::string GetNameForGlobalColumnIndex(const TableDefinition& defn, 396 int global_column); 397 398 std::string BestIndexStrForDefinition(const sqlite3_index_info* info, 399 const TableDefinition& defn); 400 401 void PopulateColumnLocatorMap(uint32_t); 402 403 PerfettoSqlEngine* engine; 404 std::string module_name; 405 std::string create_table_stmt; 406 TableDefinition t1_defn; 407 TableDefinition t2_defn; 408 PartitioningType partitioning; 409 base::FlatHashMap<size_t, ColumnLocator> global_index_to_column_locator; 410 }; 411 struct Vtab : public sqlite3_vtab { 412 sqlite::ModuleStateManager<SpanJoinOperatorModule>::PerVtabState* state; 413 }; 414 415 // Base class for a cursor on the span table. 416 struct Cursor final : public sqlite3_vtab_cursor { CursorSpanJoinOperatorModule::final417 explicit Cursor(State* _state) 418 : t1(_state, &_state->t1_defn), 419 t2(_state, &_state->t2_defn), 420 state(_state) {} 421 422 bool IsOverlappingSpan() const; 423 base::Status FindOverlappingSpan(); 424 Query* FindEarliestFinishQuery(); 425 426 Query t1; 427 Query t2; 428 429 Query* next_query = nullptr; 430 431 // Only valid for kMixedPartition. 432 int64_t last_mixed_partition_ = std::numeric_limits<int64_t>::min(); 433 434 State* state; 435 }; 436 437 static constexpr bool kSupportsWrites = false; 438 439 static int Create(sqlite3*, 440 void*, 441 int, 442 const char* const*, 443 sqlite3_vtab**, 444 char**); 445 static int Destroy(sqlite3_vtab*); 446 447 static int Connect(sqlite3*, 448 void*, 449 int, 450 const char* const*, 451 sqlite3_vtab**, 452 char**); 453 static int Disconnect(sqlite3_vtab*); 454 455 static int BestIndex(sqlite3_vtab*, sqlite3_index_info*); 456 457 static int Open(sqlite3_vtab*, sqlite3_vtab_cursor**); 458 static int Close(sqlite3_vtab_cursor*); 459 460 static int Filter(sqlite3_vtab_cursor*, 461 int, 462 const char*, 463 int, 464 sqlite3_value**); 465 static int Next(sqlite3_vtab_cursor*); 466 static int Eof(sqlite3_vtab_cursor*); 467 static int Column(sqlite3_vtab_cursor*, sqlite3_context*, int); 468 static int Rowid(sqlite3_vtab_cursor*, sqlite_int64*); 469 470 static int FindFunction(sqlite3_vtab*, 471 int, 472 const char*, 473 FindFunctionFn**, 474 void**); 475 476 // This needs to happen at the end as it depends on the functions 477 // defined above. 478 static constexpr sqlite3_module kModule = CreateModule(); 479 }; 480 481 } // namespace perfetto::trace_processor 482 483 #endif // SRC_TRACE_PROCESSOR_PERFETTO_SQL_INTRINSICS_OPERATORS_SPAN_JOIN_OPERATOR_H_ 484