• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2024 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "src/trace_processor/perfetto_sql/intrinsics/functions/interval_intersect.h"
18 
19 #include <algorithm>
20 #include <cinttypes>
21 #include <cstdint>
22 #include <iterator>
23 #include <memory>
24 #include <numeric>
25 #include <string>
26 #include <string_view>
27 #include <utility>
28 #include <variant>
29 #include <vector>
30 
31 #include "perfetto/base/compiler.h"
32 #include "perfetto/base/logging.h"
33 #include "perfetto/base/status.h"
34 #include "perfetto/ext/base/flat_hash_map.h"
35 #include "perfetto/ext/base/status_or.h"
36 #include "perfetto/ext/base/string_utils.h"
37 #include "perfetto/trace_processor/basic_types.h"
38 #include "src/trace_processor/containers/interval_intersector.h"
39 #include "src/trace_processor/containers/string_pool.h"
40 #include "src/trace_processor/db/runtime_table.h"
41 #include "src/trace_processor/perfetto_sql/engine/perfetto_sql_engine.h"
42 #include "src/trace_processor/perfetto_sql/intrinsics/types/partitioned_intervals.h"
43 #include "src/trace_processor/perfetto_sql/parser/function_util.h"
44 #include "src/trace_processor/sqlite/bindings/sqlite_bind.h"
45 #include "src/trace_processor/sqlite/bindings/sqlite_column.h"
46 #include "src/trace_processor/sqlite/bindings/sqlite_function.h"
47 #include "src/trace_processor/sqlite/bindings/sqlite_result.h"
48 #include "src/trace_processor/sqlite/bindings/sqlite_stmt.h"
49 #include "src/trace_processor/sqlite/bindings/sqlite_type.h"
50 #include "src/trace_processor/sqlite/bindings/sqlite_value.h"
51 #include "src/trace_processor/sqlite/sqlite_utils.h"
52 #include "src/trace_processor/util/status_macros.h"
53 
54 namespace perfetto::trace_processor::perfetto_sql {
55 namespace {
56 
57 static const uint32_t kArgCols = 2;
58 static const uint32_t kIdCols = 5;
59 static const uint32_t kPartitionColsOffset = kArgCols + kIdCols;
60 
61 using Intervals = std::vector<Interval>;
62 using BuilderColType = RuntimeTable::BuilderColumnType;
63 
64 struct MultiIndexInterval {
65   uint64_t start;
66   uint64_t end;
67   std::vector<int64_t> idx_in_table;
68 };
69 
FromSqlValueTypeToBuilderType(SqlValue::Type type)70 BuilderColType FromSqlValueTypeToBuilderType(SqlValue::Type type) {
71   switch (type) {
72     case SqlValue::kLong:
73       return RuntimeTable::kNullInt;
74     case SqlValue::kDouble:
75       return RuntimeTable::kNullDouble;
76     case SqlValue::kString:
77       return RuntimeTable::kString;
78     case SqlValue::kNull:
79     case SqlValue::kBytes:
80       PERFETTO_FATAL("Wrong type");
81   }
82   PERFETTO_FATAL("For gcc");
83 }
84 
85 // Translates partitions to RuntimeTable::Builder types.
GetPartitionsSqlType(const Partitions & partitions)86 base::StatusOr<std::vector<BuilderColType>> GetPartitionsSqlType(
87     const Partitions& partitions) {
88   auto partition_it = partitions.GetIterator();
89   if (!partition_it) {
90     return std::vector<BuilderColType>();
91   }
92   uint32_t p_count =
93       static_cast<uint32_t>(partition_it.value().sql_values.size());
94 
95   std::vector<BuilderColType> types(p_count, BuilderColType::kNull);
96   bool any_part_not_found = true;
97   // We expect this loop to be broken very early, but it has to be implemented
98   // as loop as we can't deduce the type partition with NULL value.
99   for (; partition_it; ++partition_it) {
100     any_part_not_found = false;
101     for (uint32_t i = 0; i < p_count && any_part_not_found; i++) {
102       auto type = types[i];
103       if (type != BuilderColType::kNull) {
104         continue;
105       }
106       if (partition_it.value().sql_values[i].is_null()) {
107         any_part_not_found = true;
108         continue;
109       }
110       types[i] = FromSqlValueTypeToBuilderType(
111           partition_it.value().sql_values[i].type);
112     }
113   }
114   if (any_part_not_found) {
115     return base::ErrStatus(
116         "INTERVAL_INTERSECT: Can't partition on column that only has NULLs");
117   }
118   return types;
119 }
120 
121 // Pushes partition into the result table. Returns the number of rows pushed.
122 // All operations in this function are done on sets of intervals from each
123 // table that correspond to the same partition.
PushPartition(RuntimeTable::Builder & builder,const std::vector<Partition * > & intervals_in_table)124 static base::StatusOr<uint32_t> PushPartition(
125     RuntimeTable::Builder& builder,
126     const std::vector<Partition*>& intervals_in_table) {
127   size_t tables_count = intervals_in_table.size();
128 
129   // Sort `tables_order` from the smallest to the biggest.
130   std::vector<uint32_t> tables_order(tables_count);
131   std::iota(tables_order.begin(), tables_order.end(), 0);
132   std::sort(tables_order.begin(), tables_order.end(),
133             [intervals_in_table](const uint32_t idx_a, const uint32_t idx_b) {
134               return intervals_in_table[idx_a]->intervals.size() <
135                      intervals_in_table[idx_b]->intervals.size();
136             });
137   uint32_t idx_of_smallest_part = tables_order.front();
138   PERFETTO_DCHECK(!intervals_in_table[idx_of_smallest_part]->intervals.empty());
139 
140   // Trivially translate intervals table with the smallest partition to
141   // `MultiIndexIntervals`.
142   std::vector<MultiIndexInterval> last_results;
143   last_results.reserve(intervals_in_table.back()->intervals.size());
144   for (const auto& interval :
145        intervals_in_table[idx_of_smallest_part]->intervals) {
146     MultiIndexInterval m_int;
147     m_int.start = interval.start;
148     m_int.end = interval.end;
149     m_int.idx_in_table.resize(tables_count);
150     m_int.idx_in_table[idx_of_smallest_part] = interval.id;
151     last_results.push_back(m_int);
152   }
153 
154   // Create an interval tree on all tables except the smallest - the first one.
155   std::vector<MultiIndexInterval> overlaps_with_this_table;
156   overlaps_with_this_table.reserve(intervals_in_table.back()->intervals.size());
157   for (uint32_t i = 1; i < tables_count && !last_results.empty(); i++) {
158     overlaps_with_this_table.clear();
159     uint32_t table_idx = tables_order[i];
160 
161     IntervalIntersector::Mode mode = IntervalIntersector::DecideMode(
162         intervals_in_table[table_idx]->is_nonoverlapping,
163         static_cast<uint32_t>(last_results.size()));
164     IntervalIntersector cur_intersector(
165         intervals_in_table[table_idx]->intervals, mode);
166     for (const auto& prev_result : last_results) {
167       Intervals new_overlaps;
168       cur_intersector.FindOverlaps(prev_result.start, prev_result.end,
169                                    new_overlaps);
170       for (const auto& overlap : new_overlaps) {
171         MultiIndexInterval m_int;
172         m_int.idx_in_table = std::move(prev_result.idx_in_table);
173         m_int.idx_in_table[table_idx] = overlap.id;
174         m_int.start = overlap.start;
175         m_int.end = overlap.end;
176         overlaps_with_this_table.push_back(std::move(m_int));
177       }
178     }
179 
180     last_results = std::move(overlaps_with_this_table);
181   }
182 
183   uint32_t rows_count = static_cast<uint32_t>(last_results.size());
184   std::vector<int64_t> timestamps(rows_count);
185   std::vector<int64_t> durations(rows_count);
186   std::vector<std::vector<int64_t>> ids(tables_count);
187   for (auto& t_ids_vec : ids) {
188     t_ids_vec.resize(rows_count);
189   }
190 
191   for (uint32_t i = 0; i < rows_count; i++) {
192     const MultiIndexInterval& interval = last_results[i];
193     timestamps[i] = static_cast<int64_t>(interval.start);
194     durations[i] = static_cast<int64_t>(interval.end) -
195                    static_cast<int64_t>(interval.start);
196     for (uint32_t j = 0; j < tables_count; j++) {
197       ids[j][i] = interval.idx_in_table[j];
198     }
199   }
200 
201   builder.AddNonNullIntegersUnchecked(0, std::move(timestamps));
202   builder.AddNonNullIntegersUnchecked(1, std::move(durations));
203   for (uint32_t i = 0; i < tables_count; i++) {
204     builder.AddNonNullIntegersUnchecked(i + kArgCols, ids[i]);
205   }
206 
207   for (uint32_t i = 0; i < intervals_in_table[0]->sql_values.size(); i++) {
208     const SqlValue& part_val = intervals_in_table[0]->sql_values[i];
209     switch (part_val.type) {
210       case SqlValue::kLong:
211         RETURN_IF_ERROR(builder.AddIntegers(i + kPartitionColsOffset,
212                                             part_val.AsLong(), rows_count));
213         continue;
214       case SqlValue::kDouble:
215         RETURN_IF_ERROR(builder.AddFloats(i + kPartitionColsOffset,
216                                           part_val.AsDouble(), rows_count));
217         continue;
218       case SqlValue::kString:
219         RETURN_IF_ERROR(builder.AddTexts(i + kPartitionColsOffset,
220                                          part_val.AsString(), rows_count));
221         continue;
222       case SqlValue::kNull:
223         RETURN_IF_ERROR(builder.AddNulls(i + kPartitionColsOffset, rows_count));
224         continue;
225       case SqlValue::kBytes:
226         PERFETTO_FATAL("Invalid partition type");
227     }
228   }
229 
230   return static_cast<uint32_t>(last_results.size());
231 }
232 
233 struct IntervalIntersect : public SqliteFunction<IntervalIntersect> {
234   static constexpr char kName[] = "__intrinsic_interval_intersect";
235   // Two tables that are being intersected.
236   // TODO(mayzner): Support more tables.
237   static constexpr int kArgCount = -1;
238 
239   struct UserDataContext {
240     PerfettoSqlEngine* engine;
241     StringPool* pool;
242   };
243 
Stepperfetto::trace_processor::perfetto_sql::__anon57f6bac00111::IntervalIntersect244   static void Step(sqlite3_context* ctx, int argc, sqlite3_value** argv) {
245     PERFETTO_DCHECK(argc >= 2);
246     size_t tabc = static_cast<size_t>(argc - 1);
247     if (tabc > kIdCols) {
248       return sqlite::result::Error(
249           ctx, "interval intersect: Can intersect at most 5 tables");
250     }
251     const char* partition_list = sqlite::value::Text(argv[argc - 1]);
252     if (!partition_list) {
253       return sqlite::result::Error(
254           ctx, "interval intersect: column list cannot be null");
255     }
256 
257     // Get column names of return columns.
258     std::vector<std::string> ret_col_names{"ts", "dur"};
259     for (uint32_t i = 0; i < kIdCols; i++) {
260       ret_col_names.push_back(base::StackString<32>("id_%u", i).ToStdString());
261     }
262     std::vector<std::string> partition_columns =
263         base::SplitString(base::StripChars(partition_list, "()", ' '), ",");
264     if (partition_columns.size() > 4) {
265       return sqlite::result::Error(
266           ctx, "interval intersect: Can take at most 4 partitions.");
267     }
268     for (const auto& c : partition_columns) {
269       std::string p_col_name = base::TrimWhitespace(c).c_str();
270       if (!p_col_name.empty()) {
271         ret_col_names.push_back(p_col_name);
272       }
273     }
274 
275     // Get data from of each table.
276     std::vector<PartitionedTable*> tables(tabc);
277     std::vector<Partitions*> t_partitions(tabc);
278 
279     for (uint32_t i = 0; i < tabc; i++) {
280       tables[i] = sqlite::value::Pointer<PartitionedTable>(
281           argv[i], PartitionedTable::kName);
282 
283       // If any of the tables is empty the intersection with it also has to be
284       // empty.
285       if (!tables[i] || tables[i]->partitions_map.size() == 0) {
286         SQLITE_ASSIGN_OR_RETURN(
287             ctx, std::unique_ptr<RuntimeTable> ret_table,
288             RuntimeTable::Builder(GetUserData(ctx)->pool, ret_col_names)
289                 .Build(0));
290         return sqlite::result::UniquePointer(ctx, std::move(ret_table),
291                                              "TABLE");
292       }
293       t_partitions[i] = &tables[i]->partitions_map;
294     }
295 
296     std::vector<BuilderColType> col_types(kArgCols + tabc,
297                                           BuilderColType::kInt);
298     // Add dummy id cols.
299     col_types.resize(kArgCols + kIdCols, BuilderColType::kNullInt);
300 
301     Partitions* p_values = &tables[0]->partitions_map;
302     SQLITE_ASSIGN_OR_RETURN(ctx, std::vector<BuilderColType> p_types,
303                             GetPartitionsSqlType(*p_values));
304     col_types.insert(col_types.end(), p_types.begin(), p_types.end());
305 
306     RuntimeTable::Builder builder(GetUserData(ctx)->pool, ret_col_names,
307                                   col_types);
308 
309     // Partitions will be taken from the table which has the least number of
310     // them.
311     auto min_el = std::min_element(t_partitions.begin(), t_partitions.end(),
312                                    [](const auto& t_a, const auto& t_b) {
313                                      return t_a->size() < t_b->size();
314                                    });
315 
316     auto t_least_partitions =
317         static_cast<uint32_t>(std::distance(t_partitions.begin(), min_el));
318 
319     // The only partitions we should look at are partitions from the table
320     // with the least partitions.
321     const Partitions* p_intervals = t_partitions[t_least_partitions];
322 
323     // For each partition insert into table.
324     uint32_t rows = 0;
325     for (auto p_it = p_intervals->GetIterator(); p_it; ++p_it) {
326       std::vector<Partition*> cur_partition_in_table;
327       bool all_have_p = true;
328 
329       // From each table get all vectors of intervals.
330       for (uint32_t i = 0; i < tabc; i++) {
331         Partitions* t = t_partitions[i];
332         if (auto found = t->Find(p_it.key())) {
333           cur_partition_in_table.push_back(found);
334         } else {
335           all_have_p = false;
336           break;
337         }
338       }
339 
340       // Only push into the table if all tables have this partition present.
341       if (all_have_p) {
342         SQLITE_ASSIGN_OR_RETURN(ctx, uint32_t pushed_rows,
343                                 PushPartition(builder, cur_partition_in_table));
344         rows += pushed_rows;
345       }
346     }
347 
348     // Fill the dummy id columns with nulls.
349     for (uint32_t i = static_cast<uint32_t>(tabc); i < kIdCols; i++) {
350       SQLITE_RETURN_IF_ERROR(ctx, builder.AddNulls(i + kArgCols, rows));
351     }
352 
353     SQLITE_ASSIGN_OR_RETURN(ctx, std::unique_ptr<RuntimeTable> ret_tab,
354                             std::move(builder).Build(rows));
355 
356     return sqlite::result::UniquePointer(ctx, std::move(ret_tab), "TABLE");
357   }
358 };
359 
360 }  // namespace
361 
RegisterIntervalIntersectFunctions(PerfettoSqlEngine & engine,StringPool * pool)362 base::Status RegisterIntervalIntersectFunctions(PerfettoSqlEngine& engine,
363                                                 StringPool* pool) {
364   return engine.RegisterSqliteFunction<IntervalIntersect>(
365       std::make_unique<IntervalIntersect::UserDataContext>(
366           IntervalIntersect::UserDataContext{&engine, pool}));
367 }
368 
369 }  // namespace perfetto::trace_processor::perfetto_sql
370