1 /*
2 * Copyright (C) 2024 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "src/trace_processor/perfetto_sql/intrinsics/functions/interval_intersect.h"
18
19 #include <algorithm>
20 #include <cinttypes>
21 #include <cstdint>
22 #include <iterator>
23 #include <memory>
24 #include <numeric>
25 #include <string>
26 #include <string_view>
27 #include <utility>
28 #include <variant>
29 #include <vector>
30
31 #include "perfetto/base/compiler.h"
32 #include "perfetto/base/logging.h"
33 #include "perfetto/base/status.h"
34 #include "perfetto/ext/base/flat_hash_map.h"
35 #include "perfetto/ext/base/status_or.h"
36 #include "perfetto/ext/base/string_utils.h"
37 #include "perfetto/trace_processor/basic_types.h"
38 #include "src/trace_processor/containers/interval_intersector.h"
39 #include "src/trace_processor/containers/string_pool.h"
40 #include "src/trace_processor/db/runtime_table.h"
41 #include "src/trace_processor/perfetto_sql/engine/perfetto_sql_engine.h"
42 #include "src/trace_processor/perfetto_sql/intrinsics/types/partitioned_intervals.h"
43 #include "src/trace_processor/perfetto_sql/parser/function_util.h"
44 #include "src/trace_processor/sqlite/bindings/sqlite_bind.h"
45 #include "src/trace_processor/sqlite/bindings/sqlite_column.h"
46 #include "src/trace_processor/sqlite/bindings/sqlite_function.h"
47 #include "src/trace_processor/sqlite/bindings/sqlite_result.h"
48 #include "src/trace_processor/sqlite/bindings/sqlite_stmt.h"
49 #include "src/trace_processor/sqlite/bindings/sqlite_type.h"
50 #include "src/trace_processor/sqlite/bindings/sqlite_value.h"
51 #include "src/trace_processor/sqlite/sqlite_utils.h"
52 #include "src/trace_processor/util/status_macros.h"
53
54 namespace perfetto::trace_processor::perfetto_sql {
55 namespace {
56
57 static const uint32_t kArgCols = 2;
58 static const uint32_t kIdCols = 5;
59 static const uint32_t kPartitionColsOffset = kArgCols + kIdCols;
60
61 using Intervals = std::vector<Interval>;
62 using BuilderColType = RuntimeTable::BuilderColumnType;
63
64 struct MultiIndexInterval {
65 uint64_t start;
66 uint64_t end;
67 std::vector<int64_t> idx_in_table;
68 };
69
FromSqlValueTypeToBuilderType(SqlValue::Type type)70 BuilderColType FromSqlValueTypeToBuilderType(SqlValue::Type type) {
71 switch (type) {
72 case SqlValue::kLong:
73 return RuntimeTable::kNullInt;
74 case SqlValue::kDouble:
75 return RuntimeTable::kNullDouble;
76 case SqlValue::kString:
77 return RuntimeTable::kString;
78 case SqlValue::kNull:
79 case SqlValue::kBytes:
80 PERFETTO_FATAL("Wrong type");
81 }
82 PERFETTO_FATAL("For gcc");
83 }
84
85 // Translates partitions to RuntimeTable::Builder types.
GetPartitionsSqlType(const Partitions & partitions)86 base::StatusOr<std::vector<BuilderColType>> GetPartitionsSqlType(
87 const Partitions& partitions) {
88 auto partition_it = partitions.GetIterator();
89 if (!partition_it) {
90 return std::vector<BuilderColType>();
91 }
92 uint32_t p_count =
93 static_cast<uint32_t>(partition_it.value().sql_values.size());
94
95 std::vector<BuilderColType> types(p_count, BuilderColType::kNull);
96 bool any_part_not_found = true;
97 // We expect this loop to be broken very early, but it has to be implemented
98 // as loop as we can't deduce the type partition with NULL value.
99 for (; partition_it; ++partition_it) {
100 any_part_not_found = false;
101 for (uint32_t i = 0; i < p_count && any_part_not_found; i++) {
102 auto type = types[i];
103 if (type != BuilderColType::kNull) {
104 continue;
105 }
106 if (partition_it.value().sql_values[i].is_null()) {
107 any_part_not_found = true;
108 continue;
109 }
110 types[i] = FromSqlValueTypeToBuilderType(
111 partition_it.value().sql_values[i].type);
112 }
113 }
114 if (any_part_not_found) {
115 return base::ErrStatus(
116 "INTERVAL_INTERSECT: Can't partition on column that only has NULLs");
117 }
118 return types;
119 }
120
121 // Pushes partition into the result table. Returns the number of rows pushed.
122 // All operations in this function are done on sets of intervals from each
123 // table that correspond to the same partition.
PushPartition(RuntimeTable::Builder & builder,const std::vector<Partition * > & intervals_in_table)124 static base::StatusOr<uint32_t> PushPartition(
125 RuntimeTable::Builder& builder,
126 const std::vector<Partition*>& intervals_in_table) {
127 size_t tables_count = intervals_in_table.size();
128
129 // Sort `tables_order` from the smallest to the biggest.
130 std::vector<uint32_t> tables_order(tables_count);
131 std::iota(tables_order.begin(), tables_order.end(), 0);
132 std::sort(tables_order.begin(), tables_order.end(),
133 [intervals_in_table](const uint32_t idx_a, const uint32_t idx_b) {
134 return intervals_in_table[idx_a]->intervals.size() <
135 intervals_in_table[idx_b]->intervals.size();
136 });
137 uint32_t idx_of_smallest_part = tables_order.front();
138 PERFETTO_DCHECK(!intervals_in_table[idx_of_smallest_part]->intervals.empty());
139
140 // Trivially translate intervals table with the smallest partition to
141 // `MultiIndexIntervals`.
142 std::vector<MultiIndexInterval> last_results;
143 last_results.reserve(intervals_in_table.back()->intervals.size());
144 for (const auto& interval :
145 intervals_in_table[idx_of_smallest_part]->intervals) {
146 MultiIndexInterval m_int;
147 m_int.start = interval.start;
148 m_int.end = interval.end;
149 m_int.idx_in_table.resize(tables_count);
150 m_int.idx_in_table[idx_of_smallest_part] = interval.id;
151 last_results.push_back(m_int);
152 }
153
154 // Create an interval tree on all tables except the smallest - the first one.
155 std::vector<MultiIndexInterval> overlaps_with_this_table;
156 overlaps_with_this_table.reserve(intervals_in_table.back()->intervals.size());
157 for (uint32_t i = 1; i < tables_count && !last_results.empty(); i++) {
158 overlaps_with_this_table.clear();
159 uint32_t table_idx = tables_order[i];
160
161 IntervalIntersector::Mode mode = IntervalIntersector::DecideMode(
162 intervals_in_table[table_idx]->is_nonoverlapping,
163 static_cast<uint32_t>(last_results.size()));
164 IntervalIntersector cur_intersector(
165 intervals_in_table[table_idx]->intervals, mode);
166 for (const auto& prev_result : last_results) {
167 Intervals new_overlaps;
168 cur_intersector.FindOverlaps(prev_result.start, prev_result.end,
169 new_overlaps);
170 for (const auto& overlap : new_overlaps) {
171 MultiIndexInterval m_int;
172 m_int.idx_in_table = std::move(prev_result.idx_in_table);
173 m_int.idx_in_table[table_idx] = overlap.id;
174 m_int.start = overlap.start;
175 m_int.end = overlap.end;
176 overlaps_with_this_table.push_back(std::move(m_int));
177 }
178 }
179
180 last_results = std::move(overlaps_with_this_table);
181 }
182
183 uint32_t rows_count = static_cast<uint32_t>(last_results.size());
184 std::vector<int64_t> timestamps(rows_count);
185 std::vector<int64_t> durations(rows_count);
186 std::vector<std::vector<int64_t>> ids(tables_count);
187 for (auto& t_ids_vec : ids) {
188 t_ids_vec.resize(rows_count);
189 }
190
191 for (uint32_t i = 0; i < rows_count; i++) {
192 const MultiIndexInterval& interval = last_results[i];
193 timestamps[i] = static_cast<int64_t>(interval.start);
194 durations[i] = static_cast<int64_t>(interval.end) -
195 static_cast<int64_t>(interval.start);
196 for (uint32_t j = 0; j < tables_count; j++) {
197 ids[j][i] = interval.idx_in_table[j];
198 }
199 }
200
201 builder.AddNonNullIntegersUnchecked(0, std::move(timestamps));
202 builder.AddNonNullIntegersUnchecked(1, std::move(durations));
203 for (uint32_t i = 0; i < tables_count; i++) {
204 builder.AddNonNullIntegersUnchecked(i + kArgCols, ids[i]);
205 }
206
207 for (uint32_t i = 0; i < intervals_in_table[0]->sql_values.size(); i++) {
208 const SqlValue& part_val = intervals_in_table[0]->sql_values[i];
209 switch (part_val.type) {
210 case SqlValue::kLong:
211 RETURN_IF_ERROR(builder.AddIntegers(i + kPartitionColsOffset,
212 part_val.AsLong(), rows_count));
213 continue;
214 case SqlValue::kDouble:
215 RETURN_IF_ERROR(builder.AddFloats(i + kPartitionColsOffset,
216 part_val.AsDouble(), rows_count));
217 continue;
218 case SqlValue::kString:
219 RETURN_IF_ERROR(builder.AddTexts(i + kPartitionColsOffset,
220 part_val.AsString(), rows_count));
221 continue;
222 case SqlValue::kNull:
223 RETURN_IF_ERROR(builder.AddNulls(i + kPartitionColsOffset, rows_count));
224 continue;
225 case SqlValue::kBytes:
226 PERFETTO_FATAL("Invalid partition type");
227 }
228 }
229
230 return static_cast<uint32_t>(last_results.size());
231 }
232
233 struct IntervalIntersect : public SqliteFunction<IntervalIntersect> {
234 static constexpr char kName[] = "__intrinsic_interval_intersect";
235 // Two tables that are being intersected.
236 // TODO(mayzner): Support more tables.
237 static constexpr int kArgCount = -1;
238
239 struct UserDataContext {
240 PerfettoSqlEngine* engine;
241 StringPool* pool;
242 };
243
Stepperfetto::trace_processor::perfetto_sql::__anon57f6bac00111::IntervalIntersect244 static void Step(sqlite3_context* ctx, int argc, sqlite3_value** argv) {
245 PERFETTO_DCHECK(argc >= 2);
246 size_t tabc = static_cast<size_t>(argc - 1);
247 if (tabc > kIdCols) {
248 return sqlite::result::Error(
249 ctx, "interval intersect: Can intersect at most 5 tables");
250 }
251 const char* partition_list = sqlite::value::Text(argv[argc - 1]);
252 if (!partition_list) {
253 return sqlite::result::Error(
254 ctx, "interval intersect: column list cannot be null");
255 }
256
257 // Get column names of return columns.
258 std::vector<std::string> ret_col_names{"ts", "dur"};
259 for (uint32_t i = 0; i < kIdCols; i++) {
260 ret_col_names.push_back(base::StackString<32>("id_%u", i).ToStdString());
261 }
262 std::vector<std::string> partition_columns =
263 base::SplitString(base::StripChars(partition_list, "()", ' '), ",");
264 if (partition_columns.size() > 4) {
265 return sqlite::result::Error(
266 ctx, "interval intersect: Can take at most 4 partitions.");
267 }
268 for (const auto& c : partition_columns) {
269 std::string p_col_name = base::TrimWhitespace(c).c_str();
270 if (!p_col_name.empty()) {
271 ret_col_names.push_back(p_col_name);
272 }
273 }
274
275 // Get data from of each table.
276 std::vector<PartitionedTable*> tables(tabc);
277 std::vector<Partitions*> t_partitions(tabc);
278
279 for (uint32_t i = 0; i < tabc; i++) {
280 tables[i] = sqlite::value::Pointer<PartitionedTable>(
281 argv[i], PartitionedTable::kName);
282
283 // If any of the tables is empty the intersection with it also has to be
284 // empty.
285 if (!tables[i] || tables[i]->partitions_map.size() == 0) {
286 SQLITE_ASSIGN_OR_RETURN(
287 ctx, std::unique_ptr<RuntimeTable> ret_table,
288 RuntimeTable::Builder(GetUserData(ctx)->pool, ret_col_names)
289 .Build(0));
290 return sqlite::result::UniquePointer(ctx, std::move(ret_table),
291 "TABLE");
292 }
293 t_partitions[i] = &tables[i]->partitions_map;
294 }
295
296 std::vector<BuilderColType> col_types(kArgCols + tabc,
297 BuilderColType::kInt);
298 // Add dummy id cols.
299 col_types.resize(kArgCols + kIdCols, BuilderColType::kNullInt);
300
301 Partitions* p_values = &tables[0]->partitions_map;
302 SQLITE_ASSIGN_OR_RETURN(ctx, std::vector<BuilderColType> p_types,
303 GetPartitionsSqlType(*p_values));
304 col_types.insert(col_types.end(), p_types.begin(), p_types.end());
305
306 RuntimeTable::Builder builder(GetUserData(ctx)->pool, ret_col_names,
307 col_types);
308
309 // Partitions will be taken from the table which has the least number of
310 // them.
311 auto min_el = std::min_element(t_partitions.begin(), t_partitions.end(),
312 [](const auto& t_a, const auto& t_b) {
313 return t_a->size() < t_b->size();
314 });
315
316 auto t_least_partitions =
317 static_cast<uint32_t>(std::distance(t_partitions.begin(), min_el));
318
319 // The only partitions we should look at are partitions from the table
320 // with the least partitions.
321 const Partitions* p_intervals = t_partitions[t_least_partitions];
322
323 // For each partition insert into table.
324 uint32_t rows = 0;
325 for (auto p_it = p_intervals->GetIterator(); p_it; ++p_it) {
326 std::vector<Partition*> cur_partition_in_table;
327 bool all_have_p = true;
328
329 // From each table get all vectors of intervals.
330 for (uint32_t i = 0; i < tabc; i++) {
331 Partitions* t = t_partitions[i];
332 if (auto found = t->Find(p_it.key())) {
333 cur_partition_in_table.push_back(found);
334 } else {
335 all_have_p = false;
336 break;
337 }
338 }
339
340 // Only push into the table if all tables have this partition present.
341 if (all_have_p) {
342 SQLITE_ASSIGN_OR_RETURN(ctx, uint32_t pushed_rows,
343 PushPartition(builder, cur_partition_in_table));
344 rows += pushed_rows;
345 }
346 }
347
348 // Fill the dummy id columns with nulls.
349 for (uint32_t i = static_cast<uint32_t>(tabc); i < kIdCols; i++) {
350 SQLITE_RETURN_IF_ERROR(ctx, builder.AddNulls(i + kArgCols, rows));
351 }
352
353 SQLITE_ASSIGN_OR_RETURN(ctx, std::unique_ptr<RuntimeTable> ret_tab,
354 std::move(builder).Build(rows));
355
356 return sqlite::result::UniquePointer(ctx, std::move(ret_tab), "TABLE");
357 }
358 };
359
360 } // namespace
361
RegisterIntervalIntersectFunctions(PerfettoSqlEngine & engine,StringPool * pool)362 base::Status RegisterIntervalIntersectFunctions(PerfettoSqlEngine& engine,
363 StringPool* pool) {
364 return engine.RegisterSqliteFunction<IntervalIntersect>(
365 std::make_unique<IntervalIntersect::UserDataContext>(
366 IntervalIntersect::UserDataContext{&engine, pool}));
367 }
368
369 } // namespace perfetto::trace_processor::perfetto_sql
370