• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2022 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "src/trace_processor/db/view.h"
18 
19 #include <stddef.h>
20 #include <stdint.h>
21 #include <algorithm>
22 #include <iterator>
23 #include <limits>
24 #include <map>
25 #include <memory>
26 #include <string>
27 #include <vector>
28 
29 #include "perfetto/base/compiler.h"
30 #include "perfetto/base/flat_set.h"
31 #include "perfetto/ext/base/flat_hash_map.h"
32 #include "perfetto/ext/base/string_view.h"
33 #include "perfetto/trace_processor/basic_types.h"
34 #include "src/trace_processor/containers/row_map.h"
35 #include "src/trace_processor/db/column.h"
36 #include "src/trace_processor/db/table.h"
37 #include "src/trace_processor/db/typed_column.h"
38 
39 namespace perfetto {
40 namespace trace_processor {
41 
42 View::View() = default;
43 
View(std::unique_ptr<TableNode> root,std::vector<SourceColumn> source_col_by_output_idx,Table::Schema schema)44 View::View(std::unique_ptr<TableNode> root,
45            std::vector<SourceColumn> source_col_by_output_idx,
46            Table::Schema schema)
47     : root_node_(std::move(root)),
48       source_col_by_output_idx_(std::move(source_col_by_output_idx)),
49       schema_(std::move(schema)) {}
50 
51 View::~View() = default;
52 
Create(Table * root_table,const char * root_table_name,std::initializer_list<JoinTable> joins,std::initializer_list<OutputColumn> cols,View * view)53 base::Status View::Create(Table* root_table,
54                           const char* root_table_name,
55                           std::initializer_list<JoinTable> joins,
56                           std::initializer_list<OutputColumn> cols,
57                           View* view) {
58   // Insert the node for the root table; the column indices being std::nullopt
59   // indicates this is the root.
60   std::unique_ptr<TableNode> root_node(
61       new TableNode{root_table, std::nullopt, std::nullopt, JoinFlag::kNoFlag,
62                     TableNode::Children{}});
63   base::FlatHashMap<base::StringView, TableNode*> node_map;
64   node_map.Insert(root_table_name, root_node.get());
65 
66   // Verify that all the joins are well-formed and build the join-tree
67   // structure.
68   for (const JoinTable& join : joins) {
69     // Verify that the previous table was previously defined (either by
70     // the root or prior join).
71     TableNode** prev_node_it = node_map.Find(join.prev_table_name);
72     if (!prev_node_it) {
73       return base::ErrStatus(
74           "View has table %s joining with table %s which was not "
75           "previously defined",
76           join.table_name, join.prev_table_name);
77     }
78     TableNode* prev_node = *prev_node_it;
79 
80     // Verify that the previous table's column exists.
81     std::optional<uint32_t> opt_prev_col_idx =
82         prev_node->table->GetColumnIndexByName(join.prev_col);
83     if (!opt_prev_col_idx) {
84       return base::ErrStatus(
85           "View references column %s in table %s which does not exist",
86           join.prev_col, join.prev_table_name);
87     }
88 
89     // Verify that the current table's column exists.
90     std::optional<uint32_t> opt_col_idx =
91         join.table->GetColumnIndexByName(join.col);
92     if (!opt_col_idx) {
93       return base::ErrStatus(
94           "View references column %s in table %s which does not exist",
95           join.col, join.table_name);
96     }
97 
98     // TODO(lalitm): add some extra checks about the columns being joined here
99     // (i.e. right column being an id, left column being non-nullable, neither
100     // column being a dummy column etc, neither column is hidden etc.).
101 
102     // Build the node and insert it into the map.
103     prev_node->children.emplace_back(
104         new TableNode{join.table, *opt_col_idx, *opt_prev_col_idx,
105                       join.join_flags, TableNode::Children{}});
106 
107     auto it_and_inserted =
108         node_map.Insert(join.table_name, prev_node->children.back().get());
109     if (!it_and_inserted.second) {
110       return base::ErrStatus("View has duplicate table name %s",
111                              join.table_name);
112     }
113   }
114 
115   // Verify that there all the output columns are well formed.
116   {
117     base::FlatSet<base::StringView> col_names;
118     for (const OutputColumn& col : cols) {
119       auto it_and_inserted = col_names.insert(col.col_name);
120       if (!it_and_inserted.second) {
121         return base::ErrStatus("View has duplicate column %s", col.col_name);
122       }
123 
124       TableNode** node_it = node_map.Find(col.source_table_name);
125       if (!node_it) {
126         return base::ErrStatus(
127             "View references table %s as source for column %s which does "
128             "not exist",
129             col.source_table_name, col.col_name);
130       }
131 
132       const TableNode* node = *node_it;
133       if (!node->table->GetColumnIndexByName(col.source_col_name)) {
134         return base::ErrStatus(
135             "View references column %s in table %s as source for column %s "
136             "which does not exist",
137             col.source_col_name, col.source_table_name, col.col_name);
138       }
139     }
140   }
141 
142   // Build the schema of the output table and a mapping from each output column
143   // to the source column which generates it.
144   std::vector<SourceColumn> source_col_by_output_idx(cols.size());
145   Table::Schema schema;
146   for (const OutputColumn& col : cols) {
147     const TableNode* node = *node_map.Find(col.source_table_name);
148     uint32_t table_col_idx =
149         *node->table->GetColumnIndexByName(col.source_col_name);
150 
151     const Column& table_col = node->table->GetColumn(table_col_idx);
152     PERFETTO_DCHECK(!table_col.IsHidden());
153 
154     // TODO(lalitm): if the view specifies the right hand side table as
155     // the source for a joined column, we should be able to use the left hand
156     // side instead. Add this as a future optimization or detect it and
157     // error out.
158 
159     base::StringView source_table_name(col.source_table_name);
160     schema.columns.emplace_back(Table::Schema::Column{
161         col.col_name, table_col.type(),
162         source_table_name == root_table_name ? table_col.IsId() : false,
163         source_table_name == root_table_name ? table_col.IsSorted() : false,
164         table_col.IsHidden(),
165         source_table_name == root_table_name ? table_col.IsSetId() : false});
166 
167     uint32_t output_idx = static_cast<uint32_t>(schema.columns.size() - 1);
168     source_col_by_output_idx[output_idx] = {node, table_col_idx};
169   }
170 
171   *view = View(std::move(root_node), std::move(source_col_by_output_idx),
172                std::move(schema));
173   return base::OkStatus();
174 }
175 
View(Table * root_table,const char * root_table_name,std::initializer_list<JoinTable> joins,std::initializer_list<OutputColumn> columns)176 View::View(Table* root_table,
177            const char* root_table_name,
178            std::initializer_list<JoinTable> joins,
179            std::initializer_list<OutputColumn> columns) {
180   base::Status status = Create(root_table, root_table_name, std::move(joins),
181                                std::move(columns), this);
182   if (!status.ok()) {
183     PERFETTO_FATAL("Failed building view: %s", status.c_message());
184   }
185 }
186 
Query(const std::vector<Constraint> & cs,const std::vector<Order> & ob,const BitVector & cols_used) const187 Table View::Query(const std::vector<Constraint>& cs,
188                   const std::vector<Order>& ob,
189                   const BitVector& cols_used) const {
190   PERFETTO_DCHECK(cols_used.size() == schema_.columns.size());
191 
192   TableNode* root = root_node_.get();
193 
194   // Below is the core algorithm which does joining and querying simultaneously.
195   // We do this to allow optimizations on which way to order the join and
196   // filter based on the join type, constraints, row counters etc.
197   //
198   // The algorithm is implemented by the |QueryHelper| class for the purposes
199   // of sharing a bunch of temporary state between the different stages of the
200   // algorithm.
201 
202   // The constructor for query helper builds all the temporary state:
203   // essentially a copy of the join tree with metadata about which tables are
204   // used, which tables remove rows from parents and generates the initial
205   // output tables and RowMaps.
206   QueryHelper helper(root, source_col_by_output_idx_, cs, cols_used);
207 
208   // |FilterAndJoinRecursive| is responsible for filtering all relevant tables
209   // which have a constraint necessary for them, materializing any tables
210   // participating in the join and computing the "child" table and "parent"
211   // RowMap.
212   //
213   // It does *not* propogate the RowMap downwards: this is done by
214   // |ApplyRowMapRecursive|. We don't do this because it would be very
215   // inefficient to constantly propogate the RowMap at every level in the middle
216   // of a DFS (at its heart, this function is a post-order DFS).
217   helper.FilterAndJoinRecursive(root);
218 
219   // |ApplyRowMapRecursive| is responsible for recursively propogating the
220   // join RowMaps downwards. This is necessary because if you have
221   //
222   // A JOIN B JOIN C
223   //
224   // |FilterAndJoinRecursive| will compute the final state of A but only
225   // intermediate states for B and C: for B, it will filter out all rows which
226   // don't exist in C and for C it will simply leave as-is. The fact that
227   // every row in A now has a corresponding row in B and similarily with C
228   // is the job of this function.
229   //
230   // ApplyRowMapRecursive then pushes down the RowMap representing the join A
231   // and B and applies that to B. Finally, it selects the B-C RowMap with the
232   // A-B RowMap and applies this to C's table.
233   helper.ApplyRowMapRecursive(root);
234 
235   // |BuildTable| converts the intermediate tables from the above and generates
236   // a cohesive table matching the schema of this view. Any "not used" columns
237   // are simply replaced with dummy columns who cannot be queried which saves
238   // the cost of doing unnecessary joins.
239   Table filtered =
240       helper.BuildTable(root, schema_, source_col_by_output_idx_, cols_used);
241 
242   // The final step is simply to sort the table resulting from filtering.
243   //
244   // TODO(lalitm): we could be more efficient about this and sort the source
245   // tables *before* we join. However, given sorts are relatively rare, we don't
246   // do this yet.
247   return filtered.Sort(ob);
248 }
249 
QueryHelper(TableNode * root_node,const std::vector<SourceColumn> & source_col_by_output_idx,const std::vector<Constraint> & cs,const BitVector & cols_used)250 View::QueryHelper::QueryHelper(
251     TableNode* root_node,
252     const std::vector<SourceColumn>& source_col_by_output_idx,
253     const std::vector<Constraint>& cs,
254     const BitVector& cols_used)
255     : state_(BuildNodeStateMap(root_node,
256                                source_col_by_output_idx,
257                                cs,
258                                cols_used)) {}
259 
FilterAndJoinRecursive(TableNode * node)260 void View::QueryHelper::FilterAndJoinRecursive(TableNode* node) {
261   NodeState& state = *state_.Find(node);
262 
263   // TODO(lalitm): instead of computing the left table straight away here, we
264   // could more intelligently figure out whether doing the join first is more
265   // efficient.
266   state.output = state.output.Filter(state.cs);
267 
268   const Table& left_table = state.output;
269   RowMap left_rm(0, left_table.row_count());
270   for (const auto& child : node->children) {
271     NodeState* child_state = state_.Find(child.get());
272 
273     // If we have no rows, just bail out to minimize work done.
274     if (left_rm.empty())
275       break;
276 
277     // If the table is not used and doesn't remove any rows in the parent, we
278     // can just rely on the default RowMap.
279     if (!child_state->is_used && !child_state->removes_parent_rows)
280       break;
281 
282     // Recurse on the child table so we now the contents of the right table
283     // before we filter any further.
284     FilterAndJoinRecursive(child.get());
285 
286     // If the right table is empty, the left table cannot possibly join
287     // without removing rows.
288     const Table& right_table = child_state->output;
289     if (right_table.row_count() == 0) {
290       left_rm = RowMap();
291       break;
292     }
293 
294     const auto& left_col = *TypedColumn<BaseId>::FromColumn(
295         &state.output.GetColumn(*child->parent_join_col_idx));
296     const auto& right_col = *IdColumn<BaseId>::FromColumn(
297         &child_state->output.GetColumn(*child->join_col_idx));
298 
299     // The core join loop. This function iterates through every row in
300     // the left table and figures out whether to keep it if the row
301     // also exists in the right table. While doing this, it also figures
302     // out the row number in the right table for every row in the left table.
303     std::vector<uint32_t> right_rm_iv;
304     right_rm_iv.reserve(left_rm.size());
305     left_col.overlay().FilterInto(&left_rm, [&](uint32_t idx) {
306       // Check if the right table has the value from the left table.
307       std::optional<uint32_t> opt_idx =
308           right_col.IndexOf(left_col.GetAtIdx(idx));
309 
310       // If it doesn't, return false indicating that this row should be
311       // removed from the left table.
312       if (!opt_idx)
313         return false;
314 
315       // If the row does exist, then keep track of the index of the row
316       // for applying to the right table and return true to also keep this
317       // row in the left table.
318       right_rm_iv.emplace_back(*opt_idx);
319       return true;
320     });
321     child_state->parent_join_rm = RowMap(std::move(right_rm_iv));
322   }
323   state.output = state.output.Apply(std::move(left_rm));
324 }
325 
ApplyRowMapRecursive(TableNode * node,RowMap rm)326 void View::QueryHelper::ApplyRowMapRecursive(TableNode* node, RowMap rm) {
327   NodeState& state = *state_.Find(node);
328   for (const auto& child : node->children) {
329     const NodeState& child_state = *state_.Find(child.get());
330     // If the child table is not used, then we don't need to recurse any
331     // further.
332     if (!child_state.is_used)
333       break;
334     ApplyRowMapRecursive(child.get(),
335                          child_state.parent_join_rm.SelectRows(rm));
336   }
337   state.output = state.output.Apply(std::move(rm));
338 }
339 
BuildNodeStateMap(TableNode * root_node,const std::vector<SourceColumn> & source_col_by_output_idx,const std::vector<Constraint> & cs,const BitVector & cols_used)340 View::QueryHelper::NodeStateMap View::QueryHelper::BuildNodeStateMap(
341     TableNode* root_node,
342     const std::vector<SourceColumn>& source_col_by_output_idx,
343     const std::vector<Constraint>& cs,
344     const BitVector& cols_used) {
345   // Populate the map contains all the nodes in the tree.
346   base::FlatHashMap<const TableNode*, QueryHelper::NodeState> node_state;
347   PostOrderDfs(root_node, [&node_state](TableNode* node) {
348     node_state.Insert(node,
349                       QueryHelper::NodeState{
350                           {}, false, false, node->table->Copy(), RowMap()});
351   });
352 
353   // For each constraint, add the translated constraint to the relevant table's
354   // constraint set.
355   for (const Constraint& c : cs) {
356     const auto& source_col = source_col_by_output_idx[c.col_idx];
357     auto& metadata = *node_state.Find(source_col.first);
358     metadata.cs.emplace_back(Constraint{source_col.second, c.op, c.value});
359   }
360 
361   // For each used column, mark the associated table as being used.
362   for (auto it = cols_used.IterateSetBits(); it; it.Next()) {
363     const auto& source = source_col_by_output_idx[it.index()];
364     node_state.Find(source.first)->is_used = true;
365   }
366 
367   // For each node, figure out whether ti will cause parent rows
368   // to be removed.
369   for (auto it = node_state.GetIterator(); it; ++it) {
370     // The below logic doesn't make sense on the root node.
371     if (it.key() == root_node)
372       continue;
373 
374     // A join will retain (i.e. *not* remove parent rows) if one of the
375     // following is true:
376     //  a) the child (right-side of join) table contains every id
377     //     which could exist in the parent (left-side) table.
378     // TODO(lalitm): add more conditions here.
379     bool join_retains_parent_rows =
380         (it.key()->join_flags & JoinFlag::kIdAlwaysPresent) != 0;
381 
382     // However, if this table has constraints, then we could always remove the
383     // parents rows even if the join would normally retain all rows.
384     it.value().removes_parent_rows =
385         !it.value().cs.empty() || !join_retains_parent_rows;
386   }
387 
388   // Do a DFS on the node tree and propogate up the is_used and
389   // removes_parent_rows boolean. In other words, if a table is used by SQLite,
390   // every ancestor must also be used as we need to join with every table on the
391   // path between the root and used table. Similarily, if a table removes parent
392   // rows, then it does this recursively upwards.
393   PostOrderDfs(root_node, [&node_state](TableNode* node) {
394     NodeState& state = *node_state.Find(node);
395     for (const auto& child : node->children) {
396       const NodeState& child_metadata = *node_state.Find(child.get());
397       state.is_used |= child_metadata.is_used;
398       state.removes_parent_rows |= child_metadata.removes_parent_rows;
399     }
400   });
401 
402   return node_state;
403 }
404 
BuildTable(TableNode * root,const Table::Schema & schema,const std::vector<SourceColumn> & source_col_by_output_idx,const BitVector & cols_used)405 Table View::QueryHelper::BuildTable(
406     TableNode* root,
407     const Table::Schema& schema,
408     const std::vector<SourceColumn>& source_col_by_output_idx,
409     const BitVector& cols_used) {
410   NodeState& root_state = *state_.Find(root);
411 
412   Table output(root->table->string_pool_);
413   output.row_count_ = root_state.output.row_count();
414   output.overlays_.emplace_back(ColumnStorageOverlay(output.row_count_));
415 
416   std::map<std::pair<const TableNode*, uint32_t>, uint32_t> cached_rm;
417   for (auto it = cols_used.IterateAllBits(); it; it.Next()) {
418     const char* col_name = schema.columns[it.index()].name.c_str();
419     if (!it.IsSet()) {
420       output.columns_.emplace_back(
421           Column::DummyColumn(col_name, &output, it.index()));
422       continue;
423     }
424 
425     const auto& source_col = source_col_by_output_idx[it.index()];
426 
427     Table& node_table = state_.Find(source_col.first)->output;
428     const Column& table_col = node_table.GetColumn(source_col.second);
429 
430     auto it_and_inserted = cached_rm.emplace(
431         std::make_pair(source_col.first, table_col.overlay_index()),
432         static_cast<uint32_t>(output.overlays_.size()));
433     if (it_and_inserted.second) {
434       output.overlays_.emplace_back(
435           std::move(node_table.overlays_[table_col.overlay_index()]));
436     }
437 
438     uint32_t rm_idx = it_and_inserted.first->second;
439     output.columns_.emplace_back(
440         Column(table_col, &output, it.index(), rm_idx, col_name));
441   }
442   return output;
443 }
444 
GetColumnCount() const445 uint32_t View::GetColumnCount() const {
446   return static_cast<uint32_t>(schema_.columns.size());
447 }
448 
EstimateRowCount() const449 uint32_t View::EstimateRowCount() const {
450   uint32_t count = 0;
451   PostOrderDfs(root_node_.get(), [&count](TableNode* node) {
452     count = std::max(node->table->row_count(), count);
453   });
454   return count;
455 }
456 
457 }  // namespace trace_processor
458 }  // namespace perfetto
459