• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2021 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef INCLUDE_PERFETTO_EXT_BASE_FLAT_HASH_MAP_H_
18 #define INCLUDE_PERFETTO_EXT_BASE_FLAT_HASH_MAP_H_
19 
20 #include "perfetto/base/compiler.h"
21 #include "perfetto/base/logging.h"
22 #include "perfetto/ext/base/utils.h"
23 
24 #include <algorithm>
25 #include <functional>
26 #include <limits>
27 
28 namespace perfetto {
29 namespace base {
30 
31 // An open-addressing hashmap implementation.
32 // Pointers are not stable, neither for keys nor values.
33 // Has similar performances of a RobinHood hash (without the complications)
34 // and 2x an unordered map.
35 // Doc: http://go/perfetto-hashtables .
36 //
37 // When used to implement a string pool in TraceProcessor, the performance
38 // characteristics obtained by replaying the set of strings seeen in a 4GB trace
39 // (226M strings, 1M unique) are the following (see flat_hash_map_benchmark.cc):
40 // This(Linear+AppendOnly)    879,383,676 ns    258.013M insertions/s
41 // This(LinearProbe):         909,206,047 ns    249.546M insertions/s
42 // This(QuadraticProbe):    1,083,844,388 ns    209.363M insertions/s
43 // std::unordered_map:      6,203,351,870 ns    36.5811M insertions/s
44 // tsl::robin_map:            931,403,397 ns    243.622M insertions/s
45 // absl::flat_hash_map:       998,013,459 ns    227.379M insertions/s
46 // FollyF14FastMap:         1,181,480,602 ns    192.074M insertions/s
47 
48 // The structs below define the probing algorithm used to probe slots upon a
49 // collision. They are guaranteed to visit all slots as our table size is always
50 // a power of two (see https://en.wikipedia.org/wiki/Quadratic_probing).
51 
52 // Linear probing can be faster if the hashing is well distributed and the load
53 // is not high. For TraceProcessor's StringPool this is the fastest. It can
54 // degenerate badly if the hashing doesn't spread (e.g., if using directly pids
55 // as keys, with a no-op hashing function).
56 struct LinearProbe {
CalcLinearProbe57   static inline size_t Calc(size_t key_hash, size_t step, size_t capacity) {
58     return (key_hash + step) & (capacity - 1);  // Linear probe
59   }
60 };
61 
62 // Generates the sequence: 0, 3, 10, 21, 36, 55, ...
63 // Can be a bit (~5%) slower than LinearProbe because it's less cache hot, but
64 // avoids degenerating badly if the hash function is bad and causes clusters.
65 // A good default choice unless benchmarks prove otherwise.
66 struct QuadraticProbe {
CalcQuadraticProbe67   static inline size_t Calc(size_t key_hash, size_t step, size_t capacity) {
68     return (key_hash + 2 * step * step + step) & (capacity - 1);
69   }
70 };
71 
72 // Tends to perform in the middle between linear and quadratic.
73 // It's a bit more cache-effective than the QuadraticProbe but can create more
74 // clustering if the hash function doesn't spread well.
75 // Generates the sequence: 0, 1, 3, 6, 10, 15, 21, ...
76 struct QuadraticHalfProbe {
CalcQuadraticHalfProbe77   static inline size_t Calc(size_t key_hash, size_t step, size_t capacity) {
78     return (key_hash + (step * step + step) / 2) & (capacity - 1);
79   }
80 };
81 
82 template <typename Key,
83           typename Value,
84           typename Hasher = std::hash<Key>,
85           typename Probe = QuadraticProbe,
86           bool AppendOnly = false>
87 class FlatHashMap {
88  public:
89   class Iterator {
90    public:
Iterator(const FlatHashMap * map)91     explicit Iterator(const FlatHashMap* map) : map_(map) { FindNextNonFree(); }
92     ~Iterator() = default;
93     Iterator(const Iterator&) = default;
94     Iterator& operator=(const Iterator&) = default;
95     Iterator(Iterator&&) noexcept = default;
96     Iterator& operator=(Iterator&&) noexcept = default;
97 
key()98     Key& key() { return map_->keys_[idx_]; }
value()99     Value& value() { return map_->values_[idx_]; }
key()100     const Key& key() const { return map_->keys_[idx_]; }
value()101     const Value& value() const { return map_->values_[idx_]; }
102 
103     explicit operator bool() const { return idx_ != kEnd; }
104     Iterator& operator++() {
105       PERFETTO_DCHECK(idx_ < map_->capacity_);
106       ++idx_;
107       FindNextNonFree();
108       return *this;
109     }
110 
111    private:
112     static constexpr size_t kEnd = std::numeric_limits<size_t>::max();
113 
FindNextNonFree()114     void FindNextNonFree() {
115       const auto& tags = map_->tags_;
116       for (; idx_ < map_->capacity_; idx_++) {
117         if (tags[idx_] != kFreeSlot && (AppendOnly || tags[idx_] != kTombstone))
118           return;
119       }
120       idx_ = kEnd;
121     }
122 
123     const FlatHashMap* map_ = nullptr;
124     size_t idx_ = 0;
125   };  // Iterator
126 
127   static constexpr int kDefaultLoadLimitPct = 75;
128   explicit FlatHashMap(size_t initial_capacity = 0,
129                        int load_limit_pct = kDefaultLoadLimitPct)
load_limit_percent_(load_limit_pct)130       : load_limit_percent_(load_limit_pct) {
131     if (initial_capacity > 0)
132       Reset(initial_capacity);
133   }
134 
135   // We are calling Clear() so that the destructors for the inserted entries are
136   // called (unless they are trivial, in which case it will be a no-op).
~FlatHashMap()137   ~FlatHashMap() { Clear(); }
138 
FlatHashMap(FlatHashMap && other)139   FlatHashMap(FlatHashMap&& other) noexcept {
140     tags_ = std::move(other.tags_);
141     keys_ = std::move(other.keys_);
142     values_ = std::move(other.values_);
143     capacity_ = other.capacity_;
144     size_ = other.size_;
145     max_probe_length_ = other.max_probe_length_;
146     load_limit_ = other.load_limit_;
147     load_limit_percent_ = other.load_limit_percent_;
148 
149     new (&other) FlatHashMap();
150   }
151 
152   FlatHashMap& operator=(FlatHashMap&& other) noexcept {
153     this->~FlatHashMap();
154     new (this) FlatHashMap(std::move(other));
155     return *this;
156   }
157 
158   FlatHashMap(const FlatHashMap&) = delete;
159   FlatHashMap& operator=(const FlatHashMap&) = delete;
160 
Insert(Key key,Value value)161   std::pair<Value*, bool> Insert(Key key, Value value) {
162     const size_t key_hash = Hasher{}(key);
163     const uint8_t tag = HashToTag(key_hash);
164     static constexpr size_t kSlotNotFound = std::numeric_limits<size_t>::max();
165 
166     // This for loop does in reality at most two attempts:
167     // The first iteration either:
168     //  - Early-returns, because the key exists already,
169     //  - Finds an insertion slot and proceeds because the load is < limit.
170     // The second iteration is only hit in the unlikely case of this insertion
171     // bringing the table beyond the target |load_limit_| (or the edge case
172     // of the HT being full, if |load_limit_pct_| = 100).
173     // We cannot simply pre-grow the table before insertion, because we must
174     // guarantee that calling Insert() with a key that already exists doesn't
175     // invalidate iterators.
176     size_t insertion_slot;
177     size_t probe_len;
178     for (;;) {
179       PERFETTO_DCHECK((capacity_ & (capacity_ - 1)) == 0);  // Must be a pow2.
180       insertion_slot = kSlotNotFound;
181       // Start the iteration at the desired slot (key_hash % capacity_)
182       // searching either for a free slot or a tombstone. In the worst case we
183       // might end up scanning the whole array of slots. The Probe functions are
184       // guaranteed to visit all the slots within |capacity_| steps. If we find
185       // a free slot, we can stop the search immediately (a free slot acts as an
186       // "end of chain for entries having the same hash". If we find a
187       // tombstones (a deleted slot) we remember its position, but have to keep
188       // searching until a free slot to make sure we don't insert a duplicate
189       // key.
190       for (probe_len = 0; probe_len < capacity_;) {
191         const size_t idx = Probe::Calc(key_hash, probe_len, capacity_);
192         PERFETTO_DCHECK(idx < capacity_);
193         const uint8_t tag_idx = tags_[idx];
194         ++probe_len;
195         if (tag_idx == kFreeSlot) {
196           // Rationale for "insertion_slot == kSlotNotFound": if we encountered
197           // a tombstone while iterating we should reuse that rather than
198           // taking another slot.
199           if (AppendOnly || insertion_slot == kSlotNotFound)
200             insertion_slot = idx;
201           break;
202         }
203         // We should never encounter tombstones in AppendOnly mode.
204         PERFETTO_DCHECK(!(tag_idx == kTombstone && AppendOnly));
205         if (!AppendOnly && tag_idx == kTombstone) {
206           insertion_slot = idx;
207           continue;
208         }
209         if (tag_idx == tag && keys_[idx] == key) {
210           // The key is already in the map.
211           return std::make_pair(&values_[idx], false);
212         }
213       }  // for (idx)
214 
215       // If we got to this point the key does not exist (otherwise we would have
216       // hit the the return above) and we are going to insert a new entry.
217       // Before doing so, ensure we stay under the target load limit.
218       if (PERFETTO_UNLIKELY(size_ >= load_limit_)) {
219         MaybeGrowAndRehash(/*grow=*/true);
220         continue;
221       }
222       PERFETTO_DCHECK(insertion_slot != kSlotNotFound);
223       break;
224     }  // for (attempt)
225 
226     PERFETTO_CHECK(insertion_slot < capacity_);
227 
228     // We found a free slot (or a tombstone). Proceed with the insertion.
229     Value* value_idx = &values_[insertion_slot];
230     new (&keys_[insertion_slot]) Key(std::move(key));
231     new (value_idx) Value(std::move(value));
232     tags_[insertion_slot] = tag;
233     PERFETTO_DCHECK(probe_len > 0 && probe_len <= capacity_);
234     max_probe_length_ = std::max(max_probe_length_, probe_len);
235     size_++;
236 
237     return std::make_pair(value_idx, true);
238   }
239 
Find(const Key & key)240   Value* Find(const Key& key) const {
241     const size_t idx = FindInternal(key);
242     if (idx == kNotFound)
243       return nullptr;
244     return &values_[idx];
245   }
246 
Erase(const Key & key)247   bool Erase(const Key& key) {
248     if (AppendOnly)
249       PERFETTO_FATAL("Erase() not supported because AppendOnly=true");
250     size_t idx = FindInternal(key);
251     if (idx == kNotFound)
252       return false;
253     EraseInternal(idx);
254     return true;
255   }
256 
Clear()257   void Clear() {
258     // Avoid trivial heap operations on zero-capacity std::move()-d objects.
259     if (PERFETTO_UNLIKELY(capacity_ == 0))
260       return;
261 
262     for (size_t i = 0; i < capacity_; ++i) {
263       const uint8_t tag = tags_[i];
264       if (tag != kFreeSlot && tag != kTombstone)
265         EraseInternal(i);
266     }
267     // Clear all tombstones. We really need to do this for AppendOnly.
268     MaybeGrowAndRehash(/*grow=*/false);
269   }
270 
271   Value& operator[](Key key) {
272     auto it_and_inserted = Insert(std::move(key), Value{});
273     return *it_and_inserted.first;
274   }
275 
GetIterator()276   Iterator GetIterator() { return Iterator(this); }
GetIterator()277   const Iterator GetIterator() const { return Iterator(this); }
278 
size()279   size_t size() const { return size_; }
capacity()280   size_t capacity() const { return capacity_; }
281 
282   // "protected" here is only for the flat_hash_map_benchmark.cc. Everything
283   // below is by all means private.
284  protected:
285   enum ReservedTags : uint8_t { kFreeSlot = 0, kTombstone = 1 };
286   static constexpr size_t kNotFound = std::numeric_limits<size_t>::max();
287 
FindInternal(const Key & key)288   size_t FindInternal(const Key& key) const {
289     const size_t key_hash = Hasher{}(key);
290     const uint8_t tag = HashToTag(key_hash);
291     PERFETTO_DCHECK((capacity_ & (capacity_ - 1)) == 0);  // Must be a pow2.
292     PERFETTO_DCHECK(max_probe_length_ <= capacity_);
293     for (size_t i = 0; i < max_probe_length_; ++i) {
294       const size_t idx = Probe::Calc(key_hash, i, capacity_);
295       const uint8_t tag_idx = tags_[idx];
296 
297       if (tag_idx == kFreeSlot)
298         return kNotFound;
299       // HashToTag() never returns kTombstone, so the tag-check below cannot
300       // possibly match. Also we just want to skip tombstones.
301       if (tag_idx == tag && keys_[idx] == key) {
302         PERFETTO_DCHECK(tag_idx > kTombstone);
303         return idx;
304       }
305     }  // for (idx)
306     return kNotFound;
307   }
308 
EraseInternal(size_t idx)309   void EraseInternal(size_t idx) {
310     PERFETTO_DCHECK(tags_[idx] > kTombstone);
311     PERFETTO_DCHECK(size_ > 0);
312     tags_[idx] = kTombstone;
313     keys_[idx].~Key();
314     values_[idx].~Value();
315     size_--;
316   }
317 
MaybeGrowAndRehash(bool grow)318   PERFETTO_NO_INLINE void MaybeGrowAndRehash(bool grow) {
319     PERFETTO_DCHECK(size_ <= capacity_);
320     const size_t old_capacity = capacity_;
321 
322     // Grow quickly up to 1MB, then chill.
323     const size_t old_size_bytes = old_capacity * (sizeof(Key) + sizeof(Value));
324     const size_t grow_factor = old_size_bytes < (1024u * 1024u) ? 8 : 2;
325     const size_t new_capacity =
326         grow ? std::max(old_capacity * grow_factor, size_t(1024))
327              : old_capacity;
328 
329     auto old_tags(std::move(tags_));
330     auto old_keys(std::move(keys_));
331     auto old_values(std::move(values_));
332     size_t old_size = size_;
333 
334     // This must be a CHECK (i.e. not just a DCHECK) to prevent UAF attacks on
335     // 32-bit archs that try to double the size of the table until wrapping.
336     PERFETTO_CHECK(new_capacity >= old_capacity);
337     Reset(new_capacity);
338 
339     size_t new_size = 0;  // Recompute the size.
340     for (size_t i = 0; i < old_capacity; ++i) {
341       const uint8_t old_tag = old_tags[i];
342       if (old_tag != kFreeSlot && old_tag != kTombstone) {
343         Insert(std::move(old_keys[i]), std::move(old_values[i]));
344         old_keys[i].~Key();  // Destroy the old objects.
345         old_values[i].~Value();
346         new_size++;
347       }
348     }
349     PERFETTO_DCHECK(new_size == old_size);
350     size_ = new_size;
351   }
352 
353   // Doesn't call destructors. Use Clear() for that.
Reset(size_t n)354   PERFETTO_NO_INLINE void Reset(size_t n) {
355     PERFETTO_DCHECK((n & (n - 1)) == 0);  // Must be a pow2.
356 
357     capacity_ = n;
358     max_probe_length_ = 0;
359     size_ = 0;
360     load_limit_ = n * static_cast<size_t>(load_limit_percent_) / 100;
361     load_limit_ = std::min(load_limit_, n);
362 
363     tags_.reset(new uint8_t[n]);
364     memset(&tags_[0], 0, n);                  // Clear all tags.
365     keys_ = AlignedAllocTyped<Key[]>(n);      // Deliberately not 0-initialized.
366     values_ = AlignedAllocTyped<Value[]>(n);  // Deliberately not 0-initialized.
367   }
368 
HashToTag(size_t full_hash)369   static inline uint8_t HashToTag(size_t full_hash) {
370     uint8_t tag = full_hash >> (sizeof(full_hash) * 8 - 8);
371     // Ensure the hash is always >= 2. We use 0, 1 for kFreeSlot and kTombstone.
372     tag += (tag <= kTombstone) << 1;
373     PERFETTO_DCHECK(tag > kTombstone);
374     return tag;
375   }
376 
377   size_t capacity_ = 0;
378   size_t size_ = 0;
379   size_t max_probe_length_ = 0;
380   size_t load_limit_ = 0;  // Updated every time |capacity_| changes.
381   int load_limit_percent_ =
382       kDefaultLoadLimitPct;  // Load factor limit in % of |capacity_|.
383 
384   // These arrays have always the |capacity_| elements.
385   // Note: AlignedUniquePtr just allocates memory, doesn't invoke any ctor/dtor.
386   std::unique_ptr<uint8_t[]> tags_;
387   AlignedUniquePtr<Key[]> keys_;
388   AlignedUniquePtr<Value[]> values_;
389 };
390 
391 }  // namespace base
392 }  // namespace perfetto
393 
394 #endif  // INCLUDE_PERFETTO_EXT_BASE_FLAT_HASH_MAP_H_
395