1 /*
2 * Copyright (C) 2022 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "src/trace_processor/db/view.h"
18
19 #include <stddef.h>
20 #include <stdint.h>
21 #include <algorithm>
22 #include <iterator>
23 #include <limits>
24 #include <map>
25 #include <memory>
26 #include <string>
27 #include <vector>
28
29 #include "perfetto/base/compiler.h"
30 #include "perfetto/base/flat_set.h"
31 #include "perfetto/ext/base/flat_hash_map.h"
32 #include "perfetto/ext/base/string_view.h"
33 #include "perfetto/trace_processor/basic_types.h"
34 #include "src/trace_processor/containers/row_map.h"
35 #include "src/trace_processor/db/column.h"
36 #include "src/trace_processor/db/table.h"
37 #include "src/trace_processor/db/typed_column.h"
38
39 namespace perfetto {
40 namespace trace_processor {
41
42 View::View() = default;
43
View(std::unique_ptr<TableNode> root,std::vector<SourceColumn> source_col_by_output_idx,Table::Schema schema)44 View::View(std::unique_ptr<TableNode> root,
45 std::vector<SourceColumn> source_col_by_output_idx,
46 Table::Schema schema)
47 : root_node_(std::move(root)),
48 source_col_by_output_idx_(std::move(source_col_by_output_idx)),
49 schema_(std::move(schema)) {}
50
51 View::~View() = default;
52
Create(Table * root_table,const char * root_table_name,std::initializer_list<JoinTable> joins,std::initializer_list<OutputColumn> cols,View * view)53 base::Status View::Create(Table* root_table,
54 const char* root_table_name,
55 std::initializer_list<JoinTable> joins,
56 std::initializer_list<OutputColumn> cols,
57 View* view) {
58 // Insert the node for the root table; the column indices being std::nullopt
59 // indicates this is the root.
60 std::unique_ptr<TableNode> root_node(
61 new TableNode{root_table, std::nullopt, std::nullopt, JoinFlag::kNoFlag,
62 TableNode::Children{}});
63 base::FlatHashMap<base::StringView, TableNode*> node_map;
64 node_map.Insert(root_table_name, root_node.get());
65
66 // Verify that all the joins are well-formed and build the join-tree
67 // structure.
68 for (const JoinTable& join : joins) {
69 // Verify that the previous table was previously defined (either by
70 // the root or prior join).
71 TableNode** prev_node_it = node_map.Find(join.prev_table_name);
72 if (!prev_node_it) {
73 return base::ErrStatus(
74 "View has table %s joining with table %s which was not "
75 "previously defined",
76 join.table_name, join.prev_table_name);
77 }
78 TableNode* prev_node = *prev_node_it;
79
80 // Verify that the previous table's column exists.
81 std::optional<uint32_t> opt_prev_col_idx =
82 prev_node->table->GetColumnIndexByName(join.prev_col);
83 if (!opt_prev_col_idx) {
84 return base::ErrStatus(
85 "View references column %s in table %s which does not exist",
86 join.prev_col, join.prev_table_name);
87 }
88
89 // Verify that the current table's column exists.
90 std::optional<uint32_t> opt_col_idx =
91 join.table->GetColumnIndexByName(join.col);
92 if (!opt_col_idx) {
93 return base::ErrStatus(
94 "View references column %s in table %s which does not exist",
95 join.col, join.table_name);
96 }
97
98 // TODO(lalitm): add some extra checks about the columns being joined here
99 // (i.e. right column being an id, left column being non-nullable, neither
100 // column being a dummy column etc, neither column is hidden etc.).
101
102 // Build the node and insert it into the map.
103 prev_node->children.emplace_back(
104 new TableNode{join.table, *opt_col_idx, *opt_prev_col_idx,
105 join.join_flags, TableNode::Children{}});
106
107 auto it_and_inserted =
108 node_map.Insert(join.table_name, prev_node->children.back().get());
109 if (!it_and_inserted.second) {
110 return base::ErrStatus("View has duplicate table name %s",
111 join.table_name);
112 }
113 }
114
115 // Verify that there all the output columns are well formed.
116 {
117 base::FlatSet<base::StringView> col_names;
118 for (const OutputColumn& col : cols) {
119 auto it_and_inserted = col_names.insert(col.col_name);
120 if (!it_and_inserted.second) {
121 return base::ErrStatus("View has duplicate column %s", col.col_name);
122 }
123
124 TableNode** node_it = node_map.Find(col.source_table_name);
125 if (!node_it) {
126 return base::ErrStatus(
127 "View references table %s as source for column %s which does "
128 "not exist",
129 col.source_table_name, col.col_name);
130 }
131
132 const TableNode* node = *node_it;
133 if (!node->table->GetColumnIndexByName(col.source_col_name)) {
134 return base::ErrStatus(
135 "View references column %s in table %s as source for column %s "
136 "which does not exist",
137 col.source_col_name, col.source_table_name, col.col_name);
138 }
139 }
140 }
141
142 // Build the schema of the output table and a mapping from each output column
143 // to the source column which generates it.
144 std::vector<SourceColumn> source_col_by_output_idx(cols.size());
145 Table::Schema schema;
146 for (const OutputColumn& col : cols) {
147 const TableNode* node = *node_map.Find(col.source_table_name);
148 uint32_t table_col_idx =
149 *node->table->GetColumnIndexByName(col.source_col_name);
150
151 const Column& table_col = node->table->GetColumn(table_col_idx);
152 PERFETTO_DCHECK(!table_col.IsHidden());
153
154 // TODO(lalitm): if the view specifies the right hand side table as
155 // the source for a joined column, we should be able to use the left hand
156 // side instead. Add this as a future optimization or detect it and
157 // error out.
158
159 base::StringView source_table_name(col.source_table_name);
160 schema.columns.emplace_back(Table::Schema::Column{
161 col.col_name, table_col.type(),
162 source_table_name == root_table_name ? table_col.IsId() : false,
163 source_table_name == root_table_name ? table_col.IsSorted() : false,
164 table_col.IsHidden(),
165 source_table_name == root_table_name ? table_col.IsSetId() : false});
166
167 uint32_t output_idx = static_cast<uint32_t>(schema.columns.size() - 1);
168 source_col_by_output_idx[output_idx] = {node, table_col_idx};
169 }
170
171 *view = View(std::move(root_node), std::move(source_col_by_output_idx),
172 std::move(schema));
173 return base::OkStatus();
174 }
175
View(Table * root_table,const char * root_table_name,std::initializer_list<JoinTable> joins,std::initializer_list<OutputColumn> columns)176 View::View(Table* root_table,
177 const char* root_table_name,
178 std::initializer_list<JoinTable> joins,
179 std::initializer_list<OutputColumn> columns) {
180 base::Status status = Create(root_table, root_table_name, std::move(joins),
181 std::move(columns), this);
182 if (!status.ok()) {
183 PERFETTO_FATAL("Failed building view: %s", status.c_message());
184 }
185 }
186
Query(const std::vector<Constraint> & cs,const std::vector<Order> & ob,const BitVector & cols_used) const187 Table View::Query(const std::vector<Constraint>& cs,
188 const std::vector<Order>& ob,
189 const BitVector& cols_used) const {
190 PERFETTO_DCHECK(cols_used.size() == schema_.columns.size());
191
192 TableNode* root = root_node_.get();
193
194 // Below is the core algorithm which does joining and querying simultaneously.
195 // We do this to allow optimizations on which way to order the join and
196 // filter based on the join type, constraints, row counters etc.
197 //
198 // The algorithm is implemented by the |QueryHelper| class for the purposes
199 // of sharing a bunch of temporary state between the different stages of the
200 // algorithm.
201
202 // The constructor for query helper builds all the temporary state:
203 // essentially a copy of the join tree with metadata about which tables are
204 // used, which tables remove rows from parents and generates the initial
205 // output tables and RowMaps.
206 QueryHelper helper(root, source_col_by_output_idx_, cs, cols_used);
207
208 // |FilterAndJoinRecursive| is responsible for filtering all relevant tables
209 // which have a constraint necessary for them, materializing any tables
210 // participating in the join and computing the "child" table and "parent"
211 // RowMap.
212 //
213 // It does *not* propogate the RowMap downwards: this is done by
214 // |ApplyRowMapRecursive|. We don't do this because it would be very
215 // inefficient to constantly propogate the RowMap at every level in the middle
216 // of a DFS (at its heart, this function is a post-order DFS).
217 helper.FilterAndJoinRecursive(root);
218
219 // |ApplyRowMapRecursive| is responsible for recursively propogating the
220 // join RowMaps downwards. This is necessary because if you have
221 //
222 // A JOIN B JOIN C
223 //
224 // |FilterAndJoinRecursive| will compute the final state of A but only
225 // intermediate states for B and C: for B, it will filter out all rows which
226 // don't exist in C and for C it will simply leave as-is. The fact that
227 // every row in A now has a corresponding row in B and similarily with C
228 // is the job of this function.
229 //
230 // ApplyRowMapRecursive then pushes down the RowMap representing the join A
231 // and B and applies that to B. Finally, it selects the B-C RowMap with the
232 // A-B RowMap and applies this to C's table.
233 helper.ApplyRowMapRecursive(root);
234
235 // |BuildTable| converts the intermediate tables from the above and generates
236 // a cohesive table matching the schema of this view. Any "not used" columns
237 // are simply replaced with dummy columns who cannot be queried which saves
238 // the cost of doing unnecessary joins.
239 Table filtered =
240 helper.BuildTable(root, schema_, source_col_by_output_idx_, cols_used);
241
242 // The final step is simply to sort the table resulting from filtering.
243 //
244 // TODO(lalitm): we could be more efficient about this and sort the source
245 // tables *before* we join. However, given sorts are relatively rare, we don't
246 // do this yet.
247 return filtered.Sort(ob);
248 }
249
QueryHelper(TableNode * root_node,const std::vector<SourceColumn> & source_col_by_output_idx,const std::vector<Constraint> & cs,const BitVector & cols_used)250 View::QueryHelper::QueryHelper(
251 TableNode* root_node,
252 const std::vector<SourceColumn>& source_col_by_output_idx,
253 const std::vector<Constraint>& cs,
254 const BitVector& cols_used)
255 : state_(BuildNodeStateMap(root_node,
256 source_col_by_output_idx,
257 cs,
258 cols_used)) {}
259
FilterAndJoinRecursive(TableNode * node)260 void View::QueryHelper::FilterAndJoinRecursive(TableNode* node) {
261 NodeState& state = *state_.Find(node);
262
263 // TODO(lalitm): instead of computing the left table straight away here, we
264 // could more intelligently figure out whether doing the join first is more
265 // efficient.
266 state.output = state.output.Filter(state.cs);
267
268 const Table& left_table = state.output;
269 RowMap left_rm(0, left_table.row_count());
270 for (const auto& child : node->children) {
271 NodeState* child_state = state_.Find(child.get());
272
273 // If we have no rows, just bail out to minimize work done.
274 if (left_rm.empty())
275 break;
276
277 // If the table is not used and doesn't remove any rows in the parent, we
278 // can just rely on the default RowMap.
279 if (!child_state->is_used && !child_state->removes_parent_rows)
280 break;
281
282 // Recurse on the child table so we now the contents of the right table
283 // before we filter any further.
284 FilterAndJoinRecursive(child.get());
285
286 // If the right table is empty, the left table cannot possibly join
287 // without removing rows.
288 const Table& right_table = child_state->output;
289 if (right_table.row_count() == 0) {
290 left_rm = RowMap();
291 break;
292 }
293
294 const auto& left_col = *TypedColumn<BaseId>::FromColumn(
295 &state.output.GetColumn(*child->parent_join_col_idx));
296 const auto& right_col = *IdColumn<BaseId>::FromColumn(
297 &child_state->output.GetColumn(*child->join_col_idx));
298
299 // The core join loop. This function iterates through every row in
300 // the left table and figures out whether to keep it if the row
301 // also exists in the right table. While doing this, it also figures
302 // out the row number in the right table for every row in the left table.
303 std::vector<uint32_t> right_rm_iv;
304 right_rm_iv.reserve(left_rm.size());
305 left_col.overlay().FilterInto(&left_rm, [&](uint32_t idx) {
306 // Check if the right table has the value from the left table.
307 std::optional<uint32_t> opt_idx =
308 right_col.IndexOf(left_col.GetAtIdx(idx));
309
310 // If it doesn't, return false indicating that this row should be
311 // removed from the left table.
312 if (!opt_idx)
313 return false;
314
315 // If the row does exist, then keep track of the index of the row
316 // for applying to the right table and return true to also keep this
317 // row in the left table.
318 right_rm_iv.emplace_back(*opt_idx);
319 return true;
320 });
321 child_state->parent_join_rm = RowMap(std::move(right_rm_iv));
322 }
323 state.output = state.output.Apply(std::move(left_rm));
324 }
325
ApplyRowMapRecursive(TableNode * node,RowMap rm)326 void View::QueryHelper::ApplyRowMapRecursive(TableNode* node, RowMap rm) {
327 NodeState& state = *state_.Find(node);
328 for (const auto& child : node->children) {
329 const NodeState& child_state = *state_.Find(child.get());
330 // If the child table is not used, then we don't need to recurse any
331 // further.
332 if (!child_state.is_used)
333 break;
334 ApplyRowMapRecursive(child.get(),
335 child_state.parent_join_rm.SelectRows(rm));
336 }
337 state.output = state.output.Apply(std::move(rm));
338 }
339
BuildNodeStateMap(TableNode * root_node,const std::vector<SourceColumn> & source_col_by_output_idx,const std::vector<Constraint> & cs,const BitVector & cols_used)340 View::QueryHelper::NodeStateMap View::QueryHelper::BuildNodeStateMap(
341 TableNode* root_node,
342 const std::vector<SourceColumn>& source_col_by_output_idx,
343 const std::vector<Constraint>& cs,
344 const BitVector& cols_used) {
345 // Populate the map contains all the nodes in the tree.
346 base::FlatHashMap<const TableNode*, QueryHelper::NodeState> node_state;
347 PostOrderDfs(root_node, [&node_state](TableNode* node) {
348 node_state.Insert(node,
349 QueryHelper::NodeState{
350 {}, false, false, node->table->Copy(), RowMap()});
351 });
352
353 // For each constraint, add the translated constraint to the relevant table's
354 // constraint set.
355 for (const Constraint& c : cs) {
356 const auto& source_col = source_col_by_output_idx[c.col_idx];
357 auto& metadata = *node_state.Find(source_col.first);
358 metadata.cs.emplace_back(Constraint{source_col.second, c.op, c.value});
359 }
360
361 // For each used column, mark the associated table as being used.
362 for (auto it = cols_used.IterateSetBits(); it; it.Next()) {
363 const auto& source = source_col_by_output_idx[it.index()];
364 node_state.Find(source.first)->is_used = true;
365 }
366
367 // For each node, figure out whether ti will cause parent rows
368 // to be removed.
369 for (auto it = node_state.GetIterator(); it; ++it) {
370 // The below logic doesn't make sense on the root node.
371 if (it.key() == root_node)
372 continue;
373
374 // A join will retain (i.e. *not* remove parent rows) if one of the
375 // following is true:
376 // a) the child (right-side of join) table contains every id
377 // which could exist in the parent (left-side) table.
378 // TODO(lalitm): add more conditions here.
379 bool join_retains_parent_rows =
380 (it.key()->join_flags & JoinFlag::kIdAlwaysPresent) != 0;
381
382 // However, if this table has constraints, then we could always remove the
383 // parents rows even if the join would normally retain all rows.
384 it.value().removes_parent_rows =
385 !it.value().cs.empty() || !join_retains_parent_rows;
386 }
387
388 // Do a DFS on the node tree and propogate up the is_used and
389 // removes_parent_rows boolean. In other words, if a table is used by SQLite,
390 // every ancestor must also be used as we need to join with every table on the
391 // path between the root and used table. Similarily, if a table removes parent
392 // rows, then it does this recursively upwards.
393 PostOrderDfs(root_node, [&node_state](TableNode* node) {
394 NodeState& state = *node_state.Find(node);
395 for (const auto& child : node->children) {
396 const NodeState& child_metadata = *node_state.Find(child.get());
397 state.is_used |= child_metadata.is_used;
398 state.removes_parent_rows |= child_metadata.removes_parent_rows;
399 }
400 });
401
402 return node_state;
403 }
404
BuildTable(TableNode * root,const Table::Schema & schema,const std::vector<SourceColumn> & source_col_by_output_idx,const BitVector & cols_used)405 Table View::QueryHelper::BuildTable(
406 TableNode* root,
407 const Table::Schema& schema,
408 const std::vector<SourceColumn>& source_col_by_output_idx,
409 const BitVector& cols_used) {
410 NodeState& root_state = *state_.Find(root);
411
412 Table output(root->table->string_pool_);
413 output.row_count_ = root_state.output.row_count();
414 output.overlays_.emplace_back(ColumnStorageOverlay(output.row_count_));
415
416 std::map<std::pair<const TableNode*, uint32_t>, uint32_t> cached_rm;
417 for (auto it = cols_used.IterateAllBits(); it; it.Next()) {
418 const char* col_name = schema.columns[it.index()].name.c_str();
419 if (!it.IsSet()) {
420 output.columns_.emplace_back(
421 Column::DummyColumn(col_name, &output, it.index()));
422 continue;
423 }
424
425 const auto& source_col = source_col_by_output_idx[it.index()];
426
427 Table& node_table = state_.Find(source_col.first)->output;
428 const Column& table_col = node_table.GetColumn(source_col.second);
429
430 auto it_and_inserted = cached_rm.emplace(
431 std::make_pair(source_col.first, table_col.overlay_index()),
432 static_cast<uint32_t>(output.overlays_.size()));
433 if (it_and_inserted.second) {
434 output.overlays_.emplace_back(
435 std::move(node_table.overlays_[table_col.overlay_index()]));
436 }
437
438 uint32_t rm_idx = it_and_inserted.first->second;
439 output.columns_.emplace_back(
440 Column(table_col, &output, it.index(), rm_idx, col_name));
441 }
442 return output;
443 }
444
GetColumnCount() const445 uint32_t View::GetColumnCount() const {
446 return static_cast<uint32_t>(schema_.columns.size());
447 }
448
EstimateRowCount() const449 uint32_t View::EstimateRowCount() const {
450 uint32_t count = 0;
451 PostOrderDfs(root_node_.get(), [&count](TableNode* node) {
452 count = std::max(node->table->row_count(), count);
453 });
454 return count;
455 }
456
457 } // namespace trace_processor
458 } // namespace perfetto
459