1 /**
2 * Copyright (c) 2021-2022 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16 #ifndef LIBPANDABASE_UTILS_BIT_TABLE_H
17 #define LIBPANDABASE_UTILS_BIT_TABLE_H
18
19 #include "utils/arena_containers.h"
20 #include "utils/bit_memory_region.h"
21 #include "utils/bit_memory_stream.h"
22 #include "utils/bit_vector.h"
23 #include "utils/hash.h"
24 #include <iomanip>
25 #include "securec.h"
26
27 namespace panda {
28
29 class VarintPack {
30 public:
31 static constexpr size_t INLINE_BITS = 4;
32 static constexpr size_t INLINE_MAX = 11;
33
34 template <size_t N>
Read(BitMemoryStreamIn * stream)35 static std::array<uint32_t, N> Read(BitMemoryStreamIn *stream)
36 {
37 static_assert(sizeof(uint64_t) * BITS_PER_BYTE >= N * INLINE_BITS);
38 // NOLINTNEXTLINE(cppcoreguidelines-pro-type-member-init)
39 std::array<uint32_t, N> values;
40 auto data = stream->Read<uint64_t>(N * INLINE_BITS);
41 for (size_t i = 0; i < N; i++) {
42 values[i] = ExtractBits(data, i * INLINE_BITS, INLINE_BITS);
43 }
44 for (size_t i = 0; i < N; i++) {
45 if (UNLIKELY(values[i] > INLINE_MAX)) {
46 values[i] = stream->Read<uint32_t>((values[i] - INLINE_MAX) * BITS_PER_BYTE);
47 }
48 }
49 return values;
50 }
51
52 template <size_t N, typename Container>
Write(BitMemoryStreamOut<Container> & stream,const std::array<uint32_t,N> & data)53 static void Write(BitMemoryStreamOut<Container> &stream, const std::array<uint32_t, N> &data)
54 {
55 for (auto value : data) {
56 if (value > INLINE_MAX) {
57 stream.Write(INLINE_MAX + BitsToBytesRoundUp(MinimumBitsToStore(value)), INLINE_BITS);
58 } else {
59 stream.Write(value, INLINE_BITS);
60 }
61 }
62 for (auto value : data) {
63 if (value > INLINE_MAX) {
64 stream.Write(value, BitsToBytesRoundUp(MinimumBitsToStore(value)) * BITS_PER_BYTE);
65 }
66 }
67 }
68 };
69
70 template <typename>
71 class BitTable;
72
73 template <size_t NumColumns, typename Accessor, bool reverse_iteration = false>
74 class BitTableRow : public std::iterator<std::random_access_iterator_tag, Accessor, int32_t, Accessor *, Accessor &> {
75 using BitTableType = BitTable<Accessor>;
76
77 public:
78 static constexpr size_t NUM_COLUMNS = NumColumns;
79 static constexpr uint32_t NO_VALUE = BitTableType::NO_VALUE;
80
81 using Reversed = BitTableRow<NumColumns, Accessor, !reverse_iteration>;
82
83 BitTableRow() = default;
BitTableRow(const BitTableType * table,int row_index)84 BitTableRow(const BitTableType *table, int row_index) : table_(table), row_index_(row_index) {}
BitTableRow(Reversed * rhs)85 explicit BitTableRow(Reversed *rhs) : table_(rhs->table_), row_index_(rhs->row_index_) {}
86
GetRow()87 uint32_t GetRow() const
88 {
89 return row_index_;
90 }
91
GetColumnStr(size_t column)92 std::string GetColumnStr(size_t column) const
93 {
94 if (Get(column) == NO_VALUE) {
95 return "-";
96 }
97 return std::to_string(Get(column));
98 }
99
GetAccessor()100 Accessor GetAccessor()
101 {
102 return Accessor(this);
103 }
104
Get(size_t index)105 uint32_t Get(size_t index) const
106 {
107 return table_->ReadColumn(row_index_, index);
108 }
109
Has(size_t index)110 bool Has(size_t index) const
111 {
112 return Get(index) != BitTableType::NO_VALUE;
113 }
114
ColumnsCount()115 size_t ColumnsCount() const
116 {
117 return table_->GetColumnsCount();
118 }
119
IsValid()120 bool IsValid() const
121 {
122 return row_index_ != -1;
123 }
124
125 bool operator==(const BitTableRow &rhs) const
126 {
127 return table_ == rhs.table_ && row_index_ == rhs.row_index_;
128 }
129
130 bool operator!=(const BitTableRow &rhs) const
131 {
132 return !(operator==(rhs));
133 }
134
135 ~BitTableRow() = default;
136
137 DEFAULT_COPY_SEMANTIC(BitTableRow);
138 DEFAULT_NOEXCEPT_MOVE_SEMANTIC(BitTableRow);
139
140 protected:
GetTable()141 auto GetTable() const
142 {
143 return table_;
144 }
145
146 private:
147 const BitTableType *table_ {nullptr};
148 int row_index_ {-1};
149
150 friend Reversed;
151 template <typename, bool>
152 friend class BitTableIterator;
153 };
154
155 template <typename Accessor, bool reverse_iteration = false>
156 class BitTableIterator
157 : public std::iterator<std::random_access_iterator_tag, Accessor, int32_t, Accessor *, Accessor &> {
158 public:
BitTableIterator(const typename Accessor::BitTableType * table,int row_index)159 BitTableIterator(const typename Accessor::BitTableType *table, int row_index) : row_(table, row_index) {}
BitTableIterator(Accessor & row)160 explicit BitTableIterator(Accessor &row) : row_(row) {}
161
162 BitTableIterator &operator++()
163 {
164 if constexpr (reverse_iteration) { // NOLINT
165 row_.row_index_--;
166 } else { // NOLINT
167 row_.row_index_++;
168 }
169 return *this;
170 }
171
172 BitTableIterator &operator--()
173 {
174 if constexpr (reverse_iteration) { // NOLINT
175 row_.row_index_++;
176 } else { // NOLINT
177 row_.row_index_--;
178 }
179 return *this;
180 }
181
182 explicit operator bool() const
183 {
184 return row_.row_index_ >= 0 && helpers::ToUnsigned(row_.row_index_) < row_.table_->GetRowsCount();
185 }
186
187 bool operator==(const BitTableIterator &rhs) const
188 {
189 return row_ == rhs.row_;
190 }
191
192 bool operator!=(const BitTableIterator &rhs) const
193 {
194 return row_ != rhs.row_;
195 }
196
197 auto operator+(int32_t n) const
198 {
199 if constexpr (reverse_iteration) { // NOLINT
200 return BitTableIterator(row_.table_, row_.row_index_ - n);
201 } else { // NOLINT
202 return BitTableIterator(row_.table_, row_.row_index_ + n);
203 }
204 }
205 auto operator-(int32_t n) const
206 {
207 if constexpr (reverse_iteration) { // NOLINT
208 return BitTableIterator(row_.table_, row_.row_index_ + n);
209 } else { // NOLINT
210 return BitTableIterator(row_.table_, row_.row_index_ - n);
211 }
212 }
213 int32_t operator-(const BitTableIterator &rhs) const
214 {
215 if constexpr (reverse_iteration) { // NOLINT
216 return rhs.row_.row_index_ - row_.row_index_;
217 } else { // NOLINT
218 return row_.row_index_ - rhs.row_.row_index_;
219 }
220 }
221
222 auto &operator+=(int32_t n)
223 {
224 if constexpr (reverse_iteration) { // NOLINT
225 row_.row_index_ -= n;
226 } else { // NOLINT
227 row_.row_index_ += n;
228 }
229 return *this;
230 }
231
232 Accessor &operator*()
233 {
234 return row_;
235 }
236 Accessor *operator->()
237 {
238 return &row_;
239 }
240
241 ~BitTableIterator() = default;
242
243 DEFAULT_COPY_SEMANTIC(BitTableIterator);
244 DEFAULT_NOEXCEPT_MOVE_SEMANTIC(BitTableIterator);
245
246 private:
247 Accessor row_;
248 };
249
250 template <size_t NumColumns>
251 struct BitTableDefault : public BitTableRow<NumColumns, BitTableDefault<NumColumns>> {
252 using Base = BitTableRow<NumColumns, BitTableDefault<NumColumns>>;
253 using Base::Base;
254 };
255
256 // NOLINTNEXTLINE(cppcoreguidelines-macro-usage)
257 #define BIT_TABLE_HEADER(columns, name) \
258 using Base = BitTableRow<columns, name>; \
259 using BitTableRow<columns, name>::BitTableRow; \
260 template <int Column, int Dummy> \
261 struct ColumnName; \
262 static constexpr const char *TABLE_NAME = #name;
263
264 // NOLINTNEXTLINE(cppcoreguidelines-macro-usage)
265 #define BIT_TABLE_COLUMN(index, name, upname) \
266 static constexpr size_t COLUMN_##upname = index; \
267 static constexpr const char COLUMN_NAME_##upname[] = #name; /* NOLINT */ \
268 template <int Dummy> \
269 struct ColumnName<index, Dummy> { \
270 static constexpr const char VALUE[] = #name; /* NOLINT */ \
271 }; \
272 uint32_t Get##name() const \
273 { \
274 return Get(index); \
275 } \
276 bool Has##name() const \
277 { \
278 return Get(index) != NO_VALUE; \
279 }
280
281 template <typename Accessor, size_t... Columns>
282 // NOLINTNEXTLINE(readability-named-parameter)
GetBitTableColumnNamesImpl(std::index_sequence<Columns...>)283 static const char *const *GetBitTableColumnNamesImpl(std::index_sequence<Columns...>)
284 {
285 static const std::array<const char *, sizeof...(Columns)> names = {
286 Accessor::template ColumnName<Columns, 0>::VALUE...};
287 return names.data();
288 }
289
290 template <typename AccessorT>
291 // NOLINTNEXTLINE(cppcoreguidelines-pro-type-member-init)
292 class BitTable {
293 public:
294 static constexpr uint32_t NO_VALUE = -1;
295 static constexpr uint32_t NO_VALUE_DIFF = -1;
296
297 static constexpr size_t NUM_COLUMNS = AccessorT::NUM_COLUMNS;
298
299 using Accessor = AccessorT;
300 using RegionType = BitMemoryRegion<const uint8_t>;
301
302 public:
303 template <bool Reversed>
304 class Range {
305 public:
306 using RangeIterator =
307 std::conditional_t<Reversed, BitTableIterator<Accessor, true>, BitTableIterator<Accessor, false>>;
308
Range(BitTable * table,int start,int end)309 Range(BitTable *table, int start, int end) : table_(table), start_(start), end_(end) {}
begin()310 auto begin()
311 {
312 return RangeIterator(table_, start_);
313 }
end()314 auto end()
315 {
316 return RangeIterator(table_, end_);
317 }
318 Accessor operator[](size_t index)
319 {
320 return *(RangeIterator(table_, start_) + index);
321 }
322
323 private:
324 BitTable *table_;
325 int start_;
326 int end_;
327 };
328
begin()329 auto begin() const
330 {
331 return BitTableIterator<Accessor, false>(this, 0);
332 }
333
begin()334 auto begin()
335 {
336 return BitTableIterator<Accessor, false>(this, 0);
337 }
338
end()339 auto end() const
340 {
341 return BitTableIterator<Accessor, false>(this, GetRowsCount());
342 }
343
end()344 auto end()
345 {
346 return BitTableIterator<Accessor, false>(this, GetRowsCount());
347 }
348
GetRange(int start,int end)349 auto GetRange(int start, int end)
350 {
351 return Range<false>(this, start, end);
352 }
GetRangeReversed(int start,int end)353 auto GetRangeReversed(int start, int end)
354 {
355 return Range<true>(this, end - 1, start - 1);
356 }
GetRangeReversed()357 auto GetRangeReversed()
358 {
359 if (GetRowsCount() == 0) {
360 return Range<true>(this, -1, -1);
361 }
362 return Range<true>(this, GetRowsCount() - 1, -1);
363 }
364
GetColumnsCount()365 constexpr size_t GetColumnsCount() const
366 {
367 return NUM_COLUMNS;
368 }
369
GetRowsCount()370 size_t GetRowsCount() const
371 {
372 return rows_count_;
373 }
374
GetRowSizeInBits()375 size_t GetRowSizeInBits() const
376 {
377 return columns_offsets_[NUM_COLUMNS];
378 }
379
GetColumnWidth(size_t index)380 size_t GetColumnWidth(size_t index) const
381 {
382 return columns_offsets_[index + 1] - columns_offsets_[index];
383 }
384
ReadColumn(size_t row_index,size_t column)385 uint32_t ReadColumn(size_t row_index, size_t column) const
386 {
387 ASSERT(column < GetColumnsCount());
388 return region_.Read(row_index * GetRowSizeInBits() + columns_offsets_[column], GetColumnWidth(column)) +
389 NO_VALUE_DIFF;
390 }
391
GetRow(size_t index)392 auto GetRow(size_t index)
393 {
394 ASSERT(index < GetRowsCount());
395 return Accessor(this, index);
396 }
397
GetRow(size_t index)398 auto GetRow(size_t index) const
399 {
400 ASSERT(index < GetRowsCount());
401 return Accessor(this, index);
402 }
403
GetInvalidRow()404 auto GetInvalidRow() const
405 {
406 return Accessor(this, -1);
407 }
408
GetBitMemoryRegion(uint32_t row)409 RegionType GetBitMemoryRegion(uint32_t row) const
410 {
411 if (row == NO_VALUE) {
412 return RegionType();
413 }
414 size_t offset = row * GetRowSizeInBits() + columns_offsets_[0];
415 return region_.Subregion(offset, GetColumnWidth(0));
416 }
417
Decode(BitMemoryStreamIn * stream)418 void Decode(BitMemoryStreamIn *stream)
419 {
420 auto columns = VarintPack::Read<NUM_COLUMNS + 1>(stream);
421 rows_count_ = columns[NUM_COLUMNS];
422
423 // Calculate offsets
424 columns_offsets_[0] = 0;
425 for (size_t i = 0; i < NUM_COLUMNS; i++) {
426 columns_offsets_[i + 1] = columns_offsets_[i] + columns[i];
427 }
428
429 region_ = stream->ReadRegion(GetRowsCount() * GetRowSizeInBits());
430 }
431
432 void Dump(std::ostream &out = std::cout) const
433 {
434 out << "BitTable: " << Accessor::TABLE_NAME << ", rows=" << GetRowsCount()
435 << ", row_size=" << GetRowSizeInBits() << std::endl;
436 static constexpr size_t INDENT = 4;
437 std::array<uint32_t, NUM_COLUMNS> width {};
438 out << " ";
439 for (size_t i = 0; i < NUM_COLUMNS; i++) {
440 out << GetColumnName(i) << " ";
441 width[i] = strlen(GetColumnName(i)) + 1;
442 }
443 out << std::endl;
444 int index = 0;
445 for (auto row : *this) {
446 static constexpr size_t INDEX_WIDTH = 2;
447 out << std::right << std::setw(INDEX_WIDTH) << index++ << ": ";
448 for (size_t i = 0; i < NUM_COLUMNS; i++) {
449 std::string indent(INDENT, ' ');
450 out << std::left << std::setw(width[i]);
451 out << row.GetColumnStr(i);
452 }
453 out << std::endl;
454 }
455 }
456
GetColumnName(size_t index)457 const char *GetColumnName(size_t index) const
458 {
459 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
460 return GetBitTableColumnNamesImpl<Accessor>(std::make_index_sequence<NUM_COLUMNS>())[index];
461 }
462
463 private:
464 BitMemoryRegion<const uint8_t> region_;
465 std::array<uint16_t, NUM_COLUMNS + 1> columns_offsets_ {};
466 uint16_t rows_count_ {0};
467 };
468
469 template <typename Accessor>
470 class BitTableBuilder {
471 public:
472 static constexpr size_t NUM_COLUMNS = Accessor::NUM_COLUMNS;
473 static constexpr uint32_t NO_VALUE = BitTable<Accessor>::NO_VALUE;
474 static constexpr uint32_t NO_VALUE_DIFF = BitTable<Accessor>::NO_VALUE_DIFF;
475
476 class Entry {
477 public:
Entry()478 Entry()
479 {
480 std::fill(data_.begin(), data_.end(), NO_VALUE);
481 }
482
Entry(std::initializer_list<uint32_t> data)483 Entry(std::initializer_list<uint32_t> data)
484 {
485 std::copy(data.begin(), data.end(), data_.begin());
486 }
487
488 uint32_t operator[](size_t index) const
489 {
490 return data_[index];
491 }
492
493 uint32_t &operator[](size_t index)
494 {
495 return data_[index];
496 }
497
498 bool operator==(const Entry &rhs) const
499 {
500 return data_ == rhs.data_;
501 }
502
SetColumn(size_t index,uint32_t value)503 void SetColumn(size_t index, uint32_t value)
504 {
505 data_[index] = value;
506 }
507
508 template <typename... Args>
SetColumns(Args...values)509 void SetColumns(Args... values)
510 {
511 static_assert(sizeof...(Args) == NUM_COLUMNS);
512 int i = 0;
513 (SetColumn(i++, values), ...);
514 }
515
516 ~Entry() = default;
517
518 DEFAULT_COPY_SEMANTIC(Entry);
519 DEFAULT_NOEXCEPT_MOVE_SEMANTIC(Entry);
520
521 private:
522 std::array<uint32_t, NUM_COLUMNS> data_;
523 };
524
525 public:
BitTableBuilder(ArenaAllocator * allocator)526 explicit BitTableBuilder(ArenaAllocator *allocator)
527 : entries_(allocator->Adapter()), dedup_map_(allocator->Adapter())
528 {
529 }
530
GetRowsCount()531 size_t GetRowsCount() const
532 {
533 return entries_.size();
534 }
535
GetLast()536 const Entry &GetLast() const
537 {
538 return entries_.back();
539 }
540
GetEntry(size_t index)541 const Entry &GetEntry(size_t index) const
542 {
543 return entries_[index];
544 }
545
GetSize()546 size_t GetSize() const
547 {
548 return entries_.size();
549 }
550
Emplace(Entry entry)551 ALWAYS_INLINE void Emplace(Entry entry)
552 {
553 entries_.emplace_back(entry);
554 }
555
Add(Entry entry)556 ALWAYS_INLINE size_t Add(Entry entry)
557 {
558 return AddArray(Span(&entry, 1));
559 }
560
AddArray(Span<Entry> entries)561 size_t AddArray(Span<Entry> entries)
562 {
563 uint32_t hash = FnvHash(entries.template SubSpan<uint32_t>(0, entries.size() * NUM_COLUMNS));
564 auto range = dedup_map_.equal_range(hash);
565 for (auto it = range.first; it != range.second; ++it) {
566 uint32_t row = it->second;
567 if (entries.size() + row <= entries_.size() &&
568 std::equal(entries.begin(), entries.end(), entries_.begin() + row)) {
569 return row;
570 }
571 }
572 uint32_t row = GetRowsCount();
573 entries_.insert(entries_.end(), entries.begin(), entries.end());
574 dedup_map_.emplace(hash, row);
575 return row;
576 }
577
CalculateColumnsWidth(Span<uint32_t> columns_width)578 void CalculateColumnsWidth(Span<uint32_t> columns_width)
579 {
580 std::fill(columns_width.begin(), columns_width.end(), 0);
581 for (const auto &entry : entries_) {
582 for (size_t i = 0; i < NUM_COLUMNS; i++) {
583 columns_width[i] |= entry[i] - NO_VALUE_DIFF;
584 }
585 }
586 for (auto &value : columns_width) {
587 value = MinimumBitsToStore(value);
588 }
589 }
590
591 template <typename Container>
Encode(BitMemoryStreamOut<Container> & stream)592 void Encode(BitMemoryStreamOut<Container> &stream)
593 {
594 // NOLINTNEXTLINE(cppcoreguidelines-pro-type-member-init)
595 std::array<uint32_t, NUM_COLUMNS + 1> columns_width;
596 CalculateColumnsWidth(Span(columns_width.data(), NUM_COLUMNS));
597 columns_width[NUM_COLUMNS] = entries_.size();
598 VarintPack::Write(stream, columns_width);
599
600 for (const auto &entry : entries_) {
601 for (size_t i = 0; i < NUM_COLUMNS; i++) {
602 stream.Write(entry[i] - NO_VALUE_DIFF, columns_width[i]);
603 }
604 }
605 }
606
607 virtual ~BitTableBuilder() = default;
608
609 NO_COPY_SEMANTIC(BitTableBuilder);
610 NO_MOVE_SEMANTIC(BitTableBuilder);
611
612 private:
613 ArenaDeque<Entry> entries_;
614 ArenaUnorderedMultiMap<uint32_t, uint32_t> dedup_map_;
615 };
616
617 class BitmapTableBuilder {
618 public:
619 using Entry = std::pair<uint32_t *, uint32_t>;
620
BitmapTableBuilder(ArenaAllocator * allocator)621 explicit BitmapTableBuilder(ArenaAllocator *allocator)
622 : allocator_(allocator), rows_(allocator->Adapter()), dedup_map_(allocator->Adapter())
623 {
624 }
625
GetRowsCount()626 size_t GetRowsCount() const
627 {
628 return rows_.size();
629 }
630
GetEntry(size_t index)631 auto GetEntry(size_t index) const
632 {
633 return BitMemoryRegion<uint32_t>(rows_[index].first, rows_[index].second);
634 }
635
Add(BitVectorSpan vec)636 size_t Add(BitVectorSpan vec)
637 {
638 if (vec.empty()) {
639 return BitTableDefault<1>::NO_VALUE;
640 }
641 uint32_t hash = FnvHash(vec.GetContainerDataSpan());
642 auto range = dedup_map_.equal_range(hash);
643 for (auto it = range.first; it != range.second; ++it) {
644 auto &row = rows_[it->second];
645 if (BitVectorSpan(row.first, row.second) == vec) {
646 return it->second;
647 }
648 }
649 size_t vec_size_in_bytes = BitsToBytesRoundUp(vec.size());
650 size_t data_size_in_bytes = RoundUp(vec_size_in_bytes, sizeof(uint32_t));
651 auto data = allocator_->AllocArray<uint32_t>(data_size_in_bytes / sizeof(uint32_t));
652 if (data_size_in_bytes != 0) {
653 errno_t res = memcpy_s(data, vec_size_in_bytes, vec.data(), vec_size_in_bytes);
654 if (res != EOK) {
655 UNREACHABLE();
656 }
657 }
658
659 // Clear extra bytes, which are not be covered by vector's data
660 size_t extra_bytes = data_size_in_bytes - vec_size_in_bytes;
661 if (extra_bytes != 0) {
662 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic,-warnings-as-errors)
663 memset_s(reinterpret_cast<uint8_t *>(data) + vec_size_in_bytes, extra_bytes, 0, extra_bytes);
664 }
665 uint32_t index = rows_.size();
666 rows_.push_back({data, vec.size()});
667 dedup_map_.emplace(hash, index);
668 return index;
669 }
670
671 template <typename Container>
Encode(BitMemoryStreamOut<Container> & stream)672 void Encode(BitMemoryStreamOut<Container> &stream)
673 {
674 static constexpr size_t COLUMNS_SIZE = 2;
675 std::array<uint32_t, COLUMNS_SIZE> columns = {0, static_cast<uint32_t>(rows_.size())};
676 std::for_each(rows_.begin(), rows_.end(),
677 [&columns](const auto &row) { columns[0] = std::max(columns[0], row.second); });
678 VarintPack::Write(stream, columns);
679
680 for (const auto &row : rows_) {
681 stream.Write(row.first, row.second, columns[0]);
682 }
683 }
684
685 virtual ~BitmapTableBuilder() = default;
686
687 NO_COPY_SEMANTIC(BitmapTableBuilder);
688 NO_MOVE_SEMANTIC(BitmapTableBuilder);
689
690 private:
691 ArenaAllocator *allocator_ {nullptr};
692 ArenaDeque<Entry> rows_;
693 ArenaUnorderedMultiMap<uint32_t, uint32_t> dedup_map_;
694 };
695
696 } // namespace panda
697
698 #endif // LIBPANDABASE_UTILS_BIT_TABLE_H
699