• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2023 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "src/trace_processor/db/column/arrangement_overlay.h"
18 
19 #include <algorithm>
20 #include <cstdint>
21 #include <memory>
22 #include <optional>
23 #include <unordered_set>
24 #include <utility>
25 #include <vector>
26 
27 #include "perfetto/base/logging.h"
28 #include "perfetto/trace_processor/basic_types.h"
29 #include "src/trace_processor/containers/bit_vector.h"
30 #include "src/trace_processor/db/column/data_layer.h"
31 #include "src/trace_processor/db/column/types.h"
32 #include "src/trace_processor/tp_metatrace.h"
33 
34 #include "protos/perfetto/trace_processor/metatrace_categories.pbzero.h"
35 #include "protos/perfetto/trace_processor/serialization.pbzero.h"
36 
37 namespace perfetto::trace_processor::column {
38 
ChainImpl(std::unique_ptr<DataLayerChain> inner,const std::vector<uint32_t> * arrangement,Indices::State arrangement_state,bool does_arrangement_order_storage)39 ArrangementOverlay::ChainImpl::ChainImpl(
40     std::unique_ptr<DataLayerChain> inner,
41     const std::vector<uint32_t>* arrangement,
42     Indices::State arrangement_state,
43     bool does_arrangement_order_storage)
44     : inner_(std::move(inner)),
45       arrangement_(arrangement),
46       arrangement_state_(arrangement_state),
47       does_arrangement_order_storage_(does_arrangement_order_storage) {}
48 
SingleSearch(FilterOp op,SqlValue sql_val,uint32_t index) const49 SingleSearchResult ArrangementOverlay::ChainImpl::SingleSearch(
50     FilterOp op,
51     SqlValue sql_val,
52     uint32_t index) const {
53   return inner_->SingleSearch(op, sql_val, (*arrangement_)[index]);
54 }
55 
ValidateSearchConstraints(FilterOp op,SqlValue value) const56 SearchValidationResult ArrangementOverlay::ChainImpl::ValidateSearchConstraints(
57     FilterOp op,
58     SqlValue value) const {
59   return inner_->ValidateSearchConstraints(op, value);
60 }
61 
SearchValidated(FilterOp op,SqlValue sql_val,Range in) const62 RangeOrBitVector ArrangementOverlay::ChainImpl::SearchValidated(
63     FilterOp op,
64     SqlValue sql_val,
65     Range in) const {
66   PERFETTO_TP_TRACE(metatrace::Category::DB,
67                     "ArrangementOverlay::ChainImpl::Search");
68 
69   if (does_arrangement_order_storage_ && op != FilterOp::kGlob &&
70       op != FilterOp::kRegex) {
71     OrderedIndices indices{arrangement_->data() + in.start, in.size(),
72                            arrangement_state_};
73     if (op == FilterOp::kNe) {
74       // Do an equality search and "invert" the range.
75       Range inner_res =
76           inner_->OrderedIndexSearchValidated(FilterOp::kEq, sql_val, indices);
77       BitVector bv(in.start);
78       bv.Resize(in.start + inner_res.start, true);
79       bv.Resize(in.start + inner_res.end, false);
80       bv.Resize(in.end, true);
81       return RangeOrBitVector(std::move(bv));
82     }
83     Range inner_res = inner_->OrderedIndexSearchValidated(op, sql_val, indices);
84     return RangeOrBitVector(
85         Range(in.start + inner_res.start, in.start + inner_res.end));
86   }
87 
88   const auto& arrangement = *arrangement_;
89   PERFETTO_DCHECK(in.end <= arrangement.size());
90   const auto [min_i, max_i] =
91       std::minmax_element(arrangement.begin() + static_cast<int32_t>(in.start),
92                           arrangement.begin() + static_cast<int32_t>(in.end));
93 
94   auto storage_result =
95       inner_->SearchValidated(op, sql_val, Range(*min_i, *max_i + 1));
96   BitVector::Builder builder(in.end, in.start);
97   if (storage_result.IsRange()) {
98     Range storage_range = std::move(storage_result).TakeIfRange();
99     for (uint32_t i = in.start; i < in.end; ++i) {
100       builder.Append(storage_range.Contains(arrangement[i]));
101     }
102   } else {
103     BitVector storage_bitvector = std::move(storage_result).TakeIfBitVector();
104     PERFETTO_DCHECK(storage_bitvector.size() == *max_i + 1);
105 
106     // After benchmarking, it turns out this complexity *is* actually worthwhile
107     // and has a noticable impact on the performance of this function in real
108     // world tables.
109 
110     // Fast path: we compare as many groups of 64 elements as we can.
111     // This should be very easy for the compiler to auto-vectorize.
112     const uint32_t* arrangement_idx = arrangement.data() + in.start;
113     uint32_t fast_path_elements = builder.BitsInCompleteWordsUntilFull();
114     for (uint32_t i = 0; i < fast_path_elements; i += BitVector::kBitsInWord) {
115       uint64_t word = 0;
116       // This part should be optimised by SIMD and is expected to be fast.
117       for (uint32_t k = 0; k < BitVector::kBitsInWord; ++k, ++arrangement_idx) {
118         bool comp_result = storage_bitvector.IsSet(*arrangement_idx);
119         word |= static_cast<uint64_t>(comp_result) << k;
120       }
121       builder.AppendWord(word);
122     }
123 
124     // Slow path: we compare <64 elements and append to fill the Builder.
125     uint32_t back_elements = builder.BitsUntilFull();
126     for (uint32_t i = 0; i < back_elements; ++i, ++arrangement_idx) {
127       builder.Append(storage_bitvector.IsSet(*arrangement_idx));
128     }
129   }
130   return RangeOrBitVector(std::move(builder).Build());
131 }
132 
IndexSearchValidated(FilterOp op,SqlValue sql_val,Indices & indices) const133 void ArrangementOverlay::ChainImpl::IndexSearchValidated(
134     FilterOp op,
135     SqlValue sql_val,
136     Indices& indices) const {
137   PERFETTO_TP_TRACE(metatrace::Category::DB,
138                     "ArrangementOverlay::ChainImpl::IndexSearch");
139 
140   for (auto& i : indices.tokens) {
141     i.index = (*arrangement_)[i.index];
142   }
143   // If the indices state is monotonic, we can just pass the arrangement's
144   // state.
145   indices.state = indices.state == Indices::State::kMonotonic
146                       ? arrangement_state_
147                       : Indices::State::kNonmonotonic;
148   return inner_->IndexSearchValidated(op, sql_val, indices);
149 }
150 
StableSort(SortToken * start,SortToken * end,SortDirection direction) const151 void ArrangementOverlay::ChainImpl::StableSort(SortToken* start,
152                                                SortToken* end,
153                                                SortDirection direction) const {
154   for (SortToken* it = start; it != end; ++it) {
155     it->index = (*arrangement_)[it->index];
156   }
157   inner_->StableSort(start, end, direction);
158 }
159 
Distinct(Indices & indices) const160 void ArrangementOverlay::ChainImpl::Distinct(Indices& indices) const {
161   PERFETTO_TP_TRACE(metatrace::Category::DB,
162                     "ArrangementOverlay::ChainImpl::Distinct");
163   // TODO(mayzner): Utilize `does_arrangmeent_order_storage_`.
164   std::unordered_set<uint32_t> s;
165   indices.tokens.erase(
166       std::remove_if(indices.tokens.begin(), indices.tokens.end(),
167                      [this, &s](Token& idx) {
168                        if (s.insert(idx.index).second) {
169                          idx.index = (*arrangement_)[idx.index];
170                          return false;
171                        }
172                        return true;
173                      }),
174       indices.tokens.end());
175   inner_->Distinct(indices);
176 }
177 
MaxElement(Indices & indices) const178 std::optional<Token> ArrangementOverlay::ChainImpl::MaxElement(
179     Indices& indices) const {
180   PERFETTO_TP_TRACE(metatrace::Category::DB,
181                     "ArrangementOverlay::ChainImpl::MaxElement");
182   for (auto& i : indices.tokens) {
183     i.index = (*arrangement_)[i.index];
184   }
185   // If the indices state is monotonic, we can just pass the arrangement's
186   // state.
187   indices.state = indices.state == Indices::State::kMonotonic
188                       ? arrangement_state_
189                       : Indices::State::kNonmonotonic;
190   return inner_->MaxElement(indices);
191 }
192 
MinElement(Indices & indices) const193 std::optional<Token> ArrangementOverlay::ChainImpl::MinElement(
194     Indices& indices) const {
195   PERFETTO_TP_TRACE(metatrace::Category::DB,
196                     "ArrangementOverlay::ChainImpl::MinElement");
197   for (auto& i : indices.tokens) {
198     i.index = (*arrangement_)[i.index];
199   }
200   // If the indices state is monotonic, we can just pass the arrangement's
201   // state.
202   indices.state = indices.state == Indices::State::kMonotonic
203                       ? arrangement_state_
204                       : Indices::State::kNonmonotonic;
205   return inner_->MinElement(indices);
206 }
207 
Get_AvoidUsingBecauseSlow(uint32_t index) const208 SqlValue ArrangementOverlay::ChainImpl::Get_AvoidUsingBecauseSlow(
209     uint32_t index) const {
210   return inner_->Get_AvoidUsingBecauseSlow((*arrangement_)[index]);
211 }
212 
Serialize(StorageProto * storage) const213 void ArrangementOverlay::ChainImpl::Serialize(StorageProto* storage) const {
214   auto* arrangement_overlay = storage->set_arrangement_overlay();
215   arrangement_overlay->set_values(
216       reinterpret_cast<const uint8_t*>(arrangement_->data()),
217       sizeof(uint32_t) * arrangement_->size());
218   inner_->Serialize(arrangement_overlay->set_storage());
219 }
220 
221 }  // namespace perfetto::trace_processor::column
222