1 /*
2 * Copyright (C) 2023 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include <algorithm>
18 #include <cstdint>
19 #include <optional>
20 #include <unordered_set>
21 #include <utility>
22 #include <vector>
23
24 #include <sys/types.h>
25 #include "perfetto/base/logging.h"
26 #include "perfetto/trace_processor/basic_types.h"
27 #include "src/trace_processor/containers/bit_vector.h"
28 #include "src/trace_processor/containers/row_map.h"
29 #include "src/trace_processor/db/column/data_layer.h"
30 #include "src/trace_processor/db/column/types.h"
31 #include "src/trace_processor/db/query_executor.h"
32 #include "src/trace_processor/db/table.h"
33
34 namespace perfetto::trace_processor {
35
36 namespace {
37
38 using Range = RowMap::Range;
39 using Indices = column::DataLayerChain::Indices;
40 using OrderedIndices = column::DataLayerChain::OrderedIndices;
41
42 } // namespace
43
ApplyConstraint(const Constraint & c,const column::DataLayerChain & chain,RowMap * rm)44 void QueryExecutor::ApplyConstraint(const Constraint& c,
45 const column::DataLayerChain& chain,
46 RowMap* rm) {
47 // Shortcut of empty row map.
48 uint32_t rm_size = rm->size();
49 if (rm_size == 0)
50 return;
51
52 uint32_t rm_first = rm->Get(0);
53 if (rm_size == 1) {
54 switch (chain.SingleSearch(c.op, c.value, rm_first)) {
55 case SingleSearchResult::kMatch:
56 return;
57 case SingleSearchResult::kNoMatch:
58 rm->Clear();
59 return;
60 case SingleSearchResult::kNeedsFullSearch:
61 break;
62 }
63 }
64
65 switch (chain.ValidateSearchConstraints(c.op, c.value)) {
66 case SearchValidationResult::kNoData:
67 rm->Clear();
68 return;
69 case SearchValidationResult::kAllData:
70 return;
71 case SearchValidationResult::kOk:
72 break;
73 }
74
75 uint32_t rm_last = rm->Get(rm_size - 1);
76 uint32_t range_size = rm_last - rm_first;
77
78 // Always prefer linear search if on a range *except* when the range is small
79 // but the last element of the range is large: this will cause a big bitvector
80 // to be created which negates the benefits of using linear search over
81 // index search.
82 bool disallows_index_search = rm->IsRange() && rm_last < range_size * 100;
83
84 // If the number of elements in the rowmap is small or the number of
85 // elements is less than 1/10th of the range, use indexed filtering.
86 // TODO(b/283763282): use Overlay estimations.
87 bool prefers_index_search =
88 rm->IsIndexVector() || rm_size < 1024 || rm_size * 10 < range_size;
89
90 if (!disallows_index_search && prefers_index_search) {
91 IndexSearch(c, chain, rm);
92 return;
93 }
94 LinearSearch(c, chain, rm);
95 }
96
LinearSearch(const Constraint & c,const column::DataLayerChain & chain,RowMap * rm)97 void QueryExecutor::LinearSearch(const Constraint& c,
98 const column::DataLayerChain& chain,
99 RowMap* rm) {
100 // TODO(b/283763282): Align these to word boundaries.
101 Range bounds(rm->Get(0), rm->Get(rm->size() - 1) + 1);
102
103 // Search the storage.
104 RangeOrBitVector res = chain.Search(c.op, c.value, bounds);
105 if (rm->IsRange()) {
106 if (res.IsRange()) {
107 Range range = std::move(res).TakeIfRange();
108 *rm = RowMap(range.start, range.end);
109 } else {
110 // The BitVector was already limited on the RowMap when created, so we
111 // can take it as it is.
112 *rm = RowMap(std::move(res).TakeIfBitVector());
113 }
114 return;
115 }
116
117 if (res.IsRange()) {
118 Range range = std::move(res).TakeIfRange();
119 rm->Intersect(RowMap(range.start, range.end));
120 return;
121 }
122 rm->Intersect(RowMap(std::move(res).TakeIfBitVector()));
123 }
124
IndexSearch(const Constraint & c,const column::DataLayerChain & chain,RowMap * rm)125 void QueryExecutor::IndexSearch(const Constraint& c,
126 const column::DataLayerChain& chain,
127 RowMap* rm) {
128 // Create outmost TableIndexVector.
129 std::vector<uint32_t> table_indices = std::move(*rm).TakeAsIndexVector();
130
131 Indices indices = Indices::Create(table_indices, Indices::State::kMonotonic);
132 chain.IndexSearch(c.op, c.value, indices);
133
134 PERFETTO_DCHECK(indices.tokens.size() <= table_indices.size());
135 for (uint32_t i = 0; i < indices.tokens.size(); ++i) {
136 table_indices[i] = indices.tokens[i].payload;
137 }
138 table_indices.resize(indices.tokens.size());
139 PERFETTO_DCHECK(std::is_sorted(table_indices.begin(), table_indices.end()));
140 *rm = RowMap(std::move(table_indices));
141 }
142
SortLegacy(const Table * table,const std::vector<Order> & ob,std::vector<uint32_t> & out)143 void QueryExecutor::SortLegacy(const Table* table,
144 const std::vector<Order>& ob,
145 std::vector<uint32_t>& out) {
146 // Setup the sort token payload to match the input vector of indices. The
147 // value of the payload will be untouched by the algorithm even while the
148 // order changes to match the ordering defined by the input constraint set.
149 std::vector<Token> rows(out.size());
150 for (uint32_t i = 0; i < out.size(); ++i) {
151 rows[i].payload = out[i];
152 }
153
154 // As our data is columnar, it's always more efficient to sort one column
155 // at a time rather than try and sort lexiographically all at once.
156 // To preserve correctness, we need to stably sort the index vector once
157 // for each order by in *reverse* order. Reverse order is important as it
158 // preserves the lexiographical property.
159 //
160 // For example, suppose we have the following:
161 // Table {
162 // Column x;
163 // Column y
164 // Column z;
165 // }
166 //
167 // Then, to sort "y asc, x desc", we could do one of two things:
168 // 1) sort the index vector all at once and on each index, we compare
169 // y then z. This is slow as the data is columnar and we need to
170 // repeatedly branch inside each column.
171 // 2) we can stably sort first on x desc and then sort on y asc. This will
172 // first put all the x in the correct order such that when we sort on
173 // y asc, we will have the correct order of x where y is the same (since
174 // the sort is stable).
175 //
176 // TODO(lalitm): it is possible that we could sort the last constraint (i.e.
177 // the first constraint in the below loop) in a non-stable way. However,
178 // this is more subtle than it appears as we would then need special
179 // handling where there are order bys on a column which is already sorted
180 // (e.g. ts, id). Investigate whether the performance gains from this are
181 // worthwhile. This also needs changes to the constraint modification logic
182 // in DbSqliteTable which currently eliminates constraints on sorted
183 // columns.
184 for (auto it = ob.rbegin(); it != ob.rend(); ++it) {
185 // Reset the index to the payload at the start of each iote
186 for (uint32_t i = 0; i < out.size(); ++i) {
187 rows[i].index = rows[i].payload;
188 }
189 table->ChainForColumn(it->col_idx)
190 .StableSort(
191 rows.data(), rows.data() + rows.size(),
192 it->desc ? SortDirection::kDescending : SortDirection::kAscending);
193 }
194
195 // Recapture the payload from each of the sort tokens whose order now
196 // indicates the order
197 for (uint32_t i = 0; i < out.size(); ++i) {
198 out[i] = rows[i].payload;
199 }
200 }
201
BoundedColumnFilterForTesting(const Constraint & c,const column::DataLayerChain & col,RowMap * rm)202 void QueryExecutor::BoundedColumnFilterForTesting(
203 const Constraint& c,
204 const column::DataLayerChain& col,
205 RowMap* rm) {
206 LinearSearch(c, col, rm);
207 }
208
IndexedColumnFilterForTesting(const Constraint & c,const column::DataLayerChain & col,RowMap * rm)209 void QueryExecutor::IndexedColumnFilterForTesting(
210 const Constraint& c,
211 const column::DataLayerChain& col,
212 RowMap* rm) {
213 IndexSearch(c, col, rm);
214 }
215
216 } // namespace perfetto::trace_processor
217