1 /*
2 * Copyright (C) 2023 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "src/trace_processor/db/runtime_table.h"
18
19 #include <algorithm>
20 #include <cinttypes>
21 #include <cstdint>
22 #include <functional>
23 #include <limits>
24 #include <memory>
25 #include <optional>
26 #include <set>
27 #include <string>
28 #include <utility>
29 #include <variant>
30 #include <vector>
31
32 #include "perfetto/base/compiler.h"
33 #include "perfetto/base/logging.h"
34 #include "perfetto/base/status.h"
35 #include "perfetto/ext/base/status_or.h"
36 #include "perfetto/trace_processor/iterator.h"
37 #include "perfetto/trace_processor/ref_counted.h"
38 #include "src/trace_processor/containers/bit_vector.h"
39 #include "src/trace_processor/containers/string_pool.h"
40 #include "src/trace_processor/db/column.h"
41 #include "src/trace_processor/db/column/data_layer.h"
42 #include "src/trace_processor/db/column/id_storage.h"
43 #include "src/trace_processor/db/column/null_overlay.h"
44 #include "src/trace_processor/db/column/numeric_storage.h"
45 #include "src/trace_processor/db/column/range_overlay.h"
46 #include "src/trace_processor/db/column/selector_overlay.h"
47 #include "src/trace_processor/db/column/string_storage.h"
48 #include "src/trace_processor/db/column/types.h"
49 #include "src/trace_processor/db/column_storage.h"
50 #include "src/trace_processor/db/column_storage_overlay.h"
51
52 namespace perfetto::trace_processor {
53 namespace {
54
55 template <typename T, typename U>
Fill(uint32_t leading_nulls,U value)56 T Fill(uint32_t leading_nulls, U value) {
57 T res;
58 for (uint32_t i = 0; i < leading_nulls; ++i) {
59 res.Append(value);
60 }
61 return res;
62 }
63
IsPerfectlyRepresentableAsDouble(int64_t res)64 bool IsPerfectlyRepresentableAsDouble(int64_t res) {
65 static constexpr int64_t kMaxDoubleRepresentible = 1ull << 53;
66 return res >= -kMaxDoubleRepresentible && res <= kMaxDoubleRepresentible;
67 }
68
IsStorageNotIntNorDouble(const RuntimeTable::VariantStorage & col)69 bool IsStorageNotIntNorDouble(const RuntimeTable::VariantStorage& col) {
70 return std::get_if<RuntimeTable::IntStorage>(&col) == nullptr &&
71 std::get_if<RuntimeTable::DoubleStorage>(&col) == nullptr;
72 }
73
CreateNonNullableIntsColumn(uint32_t col_idx,const char * col_name,ColumnStorage<int64_t> * ints_storage,std::vector<RefPtr<column::DataLayer>> & storage_layers,std::vector<RefPtr<column::DataLayer>> & overlay_layers,std::vector<ColumnLegacy> & legacy_columns,std::vector<ColumnStorageOverlay> & legacy_overlays)74 void CreateNonNullableIntsColumn(
75 uint32_t col_idx,
76 const char* col_name,
77 ColumnStorage<int64_t>* ints_storage,
78 std::vector<RefPtr<column::DataLayer>>& storage_layers,
79 std::vector<RefPtr<column::DataLayer>>& overlay_layers,
80 std::vector<ColumnLegacy>& legacy_columns,
81 std::vector<ColumnStorageOverlay>& legacy_overlays) {
82 const std::vector<int64_t>& values = ints_storage->vector();
83
84 // Looking for the iterator to the first value that is less or equal to the
85 // previous value. The values before are therefore strictly monotonic - each
86 // is greater than the previous one.
87 bool is_monotonic = true;
88 bool is_sorted = true;
89 for (uint32_t i = 1; i < values.size() && is_sorted; i++) {
90 is_monotonic = is_monotonic && values[i - 1] < values[i];
91 is_sorted = values[i - 1] <= values[i];
92 }
93
94 // The special treatement for Id columns makes no sense for empty or
95 // single element indices. Those should be treated as standard int
96 // column.
97
98 // We expect id column to:
99 // - be strictly monotonic.
100 bool is_id = is_monotonic;
101 // - have more than 1 element.
102 is_id = is_id && values.size() > 1;
103 // - have first elements smaller then 2^20, mostly to prevent timestamps
104 // columns from becoming Id columns.
105 is_id = is_id && values.front() < 1 << 20;
106 // - have `uint32_t` values.
107 is_id = is_id && values.front() >= std::numeric_limits<uint32_t>::min() &&
108 values.back() < std::numeric_limits<uint32_t>::max();
109 // - have on average more than 1 set bit per int64_t (over 1/64 density)
110 is_id = is_id && static_cast<uint32_t>(values.back()) < 64 * values.size();
111
112 if (is_id) {
113 // The column is an Id column.
114 storage_layers[col_idx].reset(new column::IdStorage());
115
116 legacy_overlays.emplace_back(BitVector::FromSortedIndexVector(values));
117 overlay_layers.emplace_back().reset(new column::SelectorOverlay(
118 legacy_overlays.back().row_map().GetIfBitVector()));
119
120 legacy_columns.push_back(ColumnLegacy::IdColumn(
121 col_idx, static_cast<uint32_t>(legacy_overlays.size() - 1), col_name,
122 ColumnLegacy::kIdFlags));
123 return;
124 }
125
126 uint32_t flags =
127 is_sorted ? ColumnLegacy::Flag::kNonNull | ColumnLegacy::Flag::kSorted
128 : ColumnLegacy::Flag::kNonNull;
129
130 legacy_columns.emplace_back(col_name, ints_storage, flags, col_idx, 0);
131 storage_layers[col_idx].reset(new column::NumericStorage<int64_t>(
132 &values, ColumnType::kInt64, is_sorted));
133 }
134
135 } // namespace
136
RuntimeTable(StringPool * pool,uint32_t row_count,std::vector<ColumnLegacy> columns,std::vector<ColumnStorageOverlay> overlays,std::vector<RefPtr<column::DataLayer>> storage_layers,std::vector<RefPtr<column::DataLayer>> null_layers,std::vector<RefPtr<column::DataLayer>> overlay_layers)137 RuntimeTable::RuntimeTable(
138 StringPool* pool,
139 uint32_t row_count,
140 std::vector<ColumnLegacy> columns,
141 std::vector<ColumnStorageOverlay> overlays,
142 std::vector<RefPtr<column::DataLayer>> storage_layers,
143 std::vector<RefPtr<column::DataLayer>> null_layers,
144 std::vector<RefPtr<column::DataLayer>> overlay_layers)
145 : Table(pool, row_count, std::move(columns), std::move(overlays)) {
146 OnConstructionCompleted(std::move(storage_layers), std::move(null_layers),
147 std::move(overlay_layers));
148 }
149
150 RuntimeTable::~RuntimeTable() = default;
151
Builder(StringPool * pool,std::vector<std::string> col_names)152 RuntimeTable::Builder::Builder(StringPool* pool,
153 std::vector<std::string> col_names)
154 : string_pool_(pool), col_names_(std::move(col_names)) {
155 for (uint32_t i = 0; i < col_names_.size(); i++) {
156 storage_.emplace_back(std::make_unique<VariantStorage>());
157 }
158 }
159
AddNull(uint32_t idx)160 base::Status RuntimeTable::Builder::AddNull(uint32_t idx) {
161 auto* col = storage_[idx].get();
162 PERFETTO_DCHECK(IsStorageNotIntNorDouble(*col));
163 if (auto* leading_nulls = std::get_if<uint32_t>(col)) {
164 (*leading_nulls)++;
165 } else if (auto* ints = std::get_if<NullIntStorage>(col)) {
166 ints->Append(std::nullopt);
167 } else if (auto* strings = std::get_if<StringStorage>(col)) {
168 strings->Append(StringPool::Id::Null());
169 } else if (auto* doubles = std::get_if<NullDoubleStorage>(col)) {
170 doubles->Append(std::nullopt);
171 } else {
172 PERFETTO_FATAL("Unexpected column type");
173 }
174 return base::OkStatus();
175 }
176
AddInteger(uint32_t idx,int64_t res)177 base::Status RuntimeTable::Builder::AddInteger(uint32_t idx, int64_t res) {
178 auto* col = storage_[idx].get();
179 PERFETTO_DCHECK(IsStorageNotIntNorDouble(*col));
180 if (auto* leading_nulls_ptr = std::get_if<uint32_t>(col)) {
181 *col = Fill<NullIntStorage>(*leading_nulls_ptr, std::nullopt);
182 }
183 if (auto* doubles = std::get_if<NullDoubleStorage>(col)) {
184 if (!IsPerfectlyRepresentableAsDouble(res)) {
185 return base::ErrStatus("Column %s contains %" PRId64
186 " which cannot be represented as a double",
187 col_names_[idx].c_str(), res);
188 }
189 doubles->Append(static_cast<double>(res));
190 return base::OkStatus();
191 }
192 auto* ints = std::get_if<NullIntStorage>(col);
193 if (!ints) {
194 return base::ErrStatus("Column %s does not have consistent types",
195 col_names_[idx].c_str());
196 }
197 ints->Append(res);
198 return base::OkStatus();
199 }
200
AddFloat(uint32_t idx,double res)201 base::Status RuntimeTable::Builder::AddFloat(uint32_t idx, double res) {
202 auto* col = storage_[idx].get();
203 PERFETTO_DCHECK(IsStorageNotIntNorDouble(*col));
204 if (auto* leading_nulls_ptr = std::get_if<uint32_t>(col)) {
205 *col = Fill<NullDoubleStorage>(*leading_nulls_ptr, std::nullopt);
206 }
207 if (auto* ints = std::get_if<NullIntStorage>(col)) {
208 NullDoubleStorage storage;
209 for (uint32_t i = 0; i < ints->size(); ++i) {
210 std::optional<int64_t> int_val = ints->Get(i);
211 if (!int_val) {
212 storage.Append(std::nullopt);
213 continue;
214 }
215 if (int_val && !IsPerfectlyRepresentableAsDouble(*int_val)) {
216 return base::ErrStatus("Column %s contains %" PRId64
217 " which cannot be represented as a double",
218 col_names_[idx].c_str(), *int_val);
219 }
220 storage.Append(static_cast<double>(*int_val));
221 }
222 *col = std::move(storage);
223 }
224 auto* doubles = std::get_if<NullDoubleStorage>(col);
225 if (!doubles) {
226 return base::ErrStatus("Column %s does not have consistent types",
227 col_names_[idx].c_str());
228 }
229 doubles->Append(res);
230 return base::OkStatus();
231 }
232
AddText(uint32_t idx,const char * ptr)233 base::Status RuntimeTable::Builder::AddText(uint32_t idx, const char* ptr) {
234 auto* col = storage_[idx].get();
235 PERFETTO_DCHECK(IsStorageNotIntNorDouble(*col));
236 if (auto* leading_nulls_ptr = std::get_if<uint32_t>(col)) {
237 *col = Fill<StringStorage>(*leading_nulls_ptr, StringPool::Id::Null());
238 }
239 auto* strings = std::get_if<StringStorage>(col);
240 if (!strings) {
241 return base::ErrStatus("Column %s does not have consistent types",
242 col_names_[idx].c_str());
243 }
244 strings->Append(string_pool_->InternString(ptr));
245 return base::OkStatus();
246 }
247
Build(uint32_t rows)248 base::StatusOr<std::unique_ptr<RuntimeTable>> RuntimeTable::Builder::Build(
249 uint32_t rows) && {
250 std::vector<RefPtr<column::DataLayer>> storage_layers(col_names_.size() + 1);
251 std::vector<RefPtr<column::DataLayer>> null_layers(col_names_.size() + 1);
252
253 std::vector<ColumnLegacy> legacy_columns;
254 std::vector<ColumnStorageOverlay> legacy_overlays;
255
256 // |overlay_layers| might use the RowMaps used by |legacy_overlays| and access
257 // them by fetching the pointer to the RowMap inside overlay. We need to make
258 // sure that those pointers will not change, hence we need to make sure that
259 // the vector will not resize. In the current implementation there is at most
260 // one overlay per column.
261 legacy_overlays.reserve(col_names_.size() + 1);
262 legacy_overlays.emplace_back(rows);
263 std::vector<RefPtr<column::DataLayer>> overlay_layers(1);
264
265 for (uint32_t i = 0; i < col_names_.size(); ++i) {
266 auto* col = storage_[i].get();
267 std::unique_ptr<column::DataLayerChain> chain;
268 PERFETTO_DCHECK(IsStorageNotIntNorDouble(*col));
269 if (auto* leading_nulls = std::get_if<uint32_t>(col)) {
270 PERFETTO_CHECK(*leading_nulls == rows);
271 *col = Fill<NullIntStorage>(*leading_nulls, std::nullopt);
272 }
273
274 if (auto* ints = std::get_if<NullIntStorage>(col)) {
275 // The `ints` column
276 PERFETTO_CHECK(ints->size() == rows);
277
278 if (ints->non_null_size() == ints->size()) {
279 // The column doesn't have any nulls so we construct a new nonnullable
280 // column.
281 *col = IntStorage::CreateFromAssertNonNull(std::move(*ints));
282 CreateNonNullableIntsColumn(
283 i, col_names_[i].c_str(), std::get_if<IntStorage>(col),
284 storage_layers, overlay_layers, legacy_columns, legacy_overlays);
285 } else {
286 // Nullable ints column.
287 legacy_columns.emplace_back(col_names_[i].c_str(), ints,
288 ColumnLegacy::Flag::kNoFlag, i, 0);
289 storage_layers[i].reset(new column::NumericStorage<int64_t>(
290 &ints->non_null_vector(), ColumnType::kInt64, false));
291 null_layers[i].reset(
292 new column::NullOverlay(&ints->non_null_bit_vector()));
293 }
294
295 // The doubles column.
296 } else if (auto* doubles = std::get_if<NullDoubleStorage>(col)) {
297 PERFETTO_CHECK(doubles->size() == rows);
298
299 if (doubles->non_null_size() == doubles->size()) {
300 // The column is not nullable.
301 *col = DoubleStorage::CreateFromAssertNonNull(std::move(*doubles));
302
303 auto* non_null_doubles = std::get_if<DoubleStorage>(col);
304 bool is_sorted = std::is_sorted(non_null_doubles->vector().begin(),
305 non_null_doubles->vector().end());
306 uint32_t flags = is_sorted ? ColumnLegacy::Flag::kNonNull |
307 ColumnLegacy::Flag::kSorted
308 : ColumnLegacy::Flag::kNonNull;
309 legacy_columns.emplace_back(col_names_[i].c_str(), non_null_doubles,
310 flags, i, 0);
311 storage_layers[i].reset(new column::NumericStorage<double>(
312 &non_null_doubles->vector(), ColumnType::kDouble, is_sorted));
313
314 } else {
315 // The column is nullable.
316 legacy_columns.emplace_back(col_names_[i].c_str(), doubles,
317 ColumnLegacy::Flag::kNoFlag, i, 0);
318 storage_layers[i].reset(new column::NumericStorage<double>(
319 &doubles->non_null_vector(), ColumnType::kDouble, false));
320 null_layers[i].reset(
321 new column::NullOverlay(&doubles->non_null_bit_vector()));
322 }
323
324 } else if (auto* strings = std::get_if<StringStorage>(col)) {
325 // The `strings` column.
326 PERFETTO_CHECK(strings->size() == rows);
327 legacy_columns.emplace_back(col_names_[i].c_str(), strings,
328 ColumnLegacy::Flag::kNonNull, i, 0);
329 storage_layers[i].reset(
330 new column::StringStorage(string_pool_, &strings->vector()));
331
332 } else {
333 PERFETTO_FATAL("Unexpected column type");
334 }
335 }
336 legacy_columns.push_back(ColumnLegacy::IdColumn(
337 static_cast<uint32_t>(legacy_columns.size()), 0, "_auto_id",
338 ColumnLegacy::kIdFlags | ColumnLegacy::Flag::kHidden));
339 storage_layers.back().reset(new column::IdStorage());
340
341 auto table = std::make_unique<RuntimeTable>(
342 string_pool_, rows, std::move(legacy_columns), std::move(legacy_overlays),
343 std::move(storage_layers), std::move(null_layers),
344 std::move(overlay_layers));
345 table->storage_ = std::move(storage_);
346 table->col_names_ = std::move(col_names_);
347
348 table->schema_.columns.reserve(table->columns().size());
349 for (const auto& col : table->columns()) {
350 table->schema_.columns.emplace_back(
351 Schema::Column{col.name(), col.type(), col.IsId(), col.IsSorted(),
352 col.IsHidden(), col.IsSetId()});
353 }
354 return {std::move(table)};
355 }
356
357 } // namespace perfetto::trace_processor
358