• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2021 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef INCLUDE_PERFETTO_EXT_BASE_FLAT_HASH_MAP_H_
18 #define INCLUDE_PERFETTO_EXT_BASE_FLAT_HASH_MAP_H_
19 
20 #include "perfetto/base/compiler.h"
21 #include "perfetto/base/logging.h"
22 #include "perfetto/ext/base/hash.h"
23 #include "perfetto/ext/base/utils.h"
24 
25 #include <algorithm>
26 #include <functional>
27 #include <limits>
28 
29 namespace perfetto {
30 namespace base {
31 
32 // An open-addressing hashmap implementation.
33 // Pointers are not stable, neither for keys nor values.
34 // Has similar performances of a RobinHood hash (without the complications)
35 // and 2x an unordered map.
36 // Doc: http://go/perfetto-hashtables .
37 //
38 // When used to implement a string pool in TraceProcessor, the performance
39 // characteristics obtained by replaying the set of strings seeen in a 4GB trace
40 // (226M strings, 1M unique) are the following (see flat_hash_map_benchmark.cc):
41 // This(Linear+AppendOnly)    879,383,676 ns    258.013M insertions/s
42 // This(LinearProbe):         909,206,047 ns    249.546M insertions/s
43 // This(QuadraticProbe):    1,083,844,388 ns    209.363M insertions/s
44 // std::unordered_map:      6,203,351,870 ns    36.5811M insertions/s
45 // tsl::robin_map:            931,403,397 ns    243.622M insertions/s
46 // absl::flat_hash_map:       998,013,459 ns    227.379M insertions/s
47 // FollyF14FastMap:         1,181,480,602 ns    192.074M insertions/s
48 
49 // The structs below define the probing algorithm used to probe slots upon a
50 // collision. They are guaranteed to visit all slots as our table size is always
51 // a power of two (see https://en.wikipedia.org/wiki/Quadratic_probing).
52 
53 // Linear probing can be faster if the hashing is well distributed and the load
54 // is not high. For TraceProcessor's StringPool this is the fastest. It can
55 // degenerate badly if the hashing doesn't spread (e.g., if using directly pids
56 // as keys, with a no-op hashing function).
57 struct LinearProbe {
CalcLinearProbe58   static inline size_t Calc(size_t key_hash, size_t step, size_t capacity) {
59     return (key_hash + step) & (capacity - 1);  // Linear probe
60   }
61 };
62 
63 // Generates the sequence: 0, 3, 10, 21, 36, 55, ...
64 // Can be a bit (~5%) slower than LinearProbe because it's less cache hot, but
65 // avoids degenerating badly if the hash function is bad and causes clusters.
66 // A good default choice unless benchmarks prove otherwise.
67 struct QuadraticProbe {
CalcQuadraticProbe68   static inline size_t Calc(size_t key_hash, size_t step, size_t capacity) {
69     return (key_hash + 2 * step * step + step) & (capacity - 1);
70   }
71 };
72 
73 // Tends to perform in the middle between linear and quadratic.
74 // It's a bit more cache-effective than the QuadraticProbe but can create more
75 // clustering if the hash function doesn't spread well.
76 // Generates the sequence: 0, 1, 3, 6, 10, 15, 21, ...
77 struct QuadraticHalfProbe {
CalcQuadraticHalfProbe78   static inline size_t Calc(size_t key_hash, size_t step, size_t capacity) {
79     return (key_hash + (step * step + step) / 2) & (capacity - 1);
80   }
81 };
82 
83 template <typename Key,
84           typename Value,
85           typename Hasher = base::Hash<Key>,
86           typename Probe = QuadraticProbe,
87           bool AppendOnly = false>
88 class FlatHashMap {
89  public:
90   class Iterator {
91    public:
Iterator(const FlatHashMap * map)92     explicit Iterator(const FlatHashMap* map) : map_(map) { FindNextNonFree(); }
93     ~Iterator() = default;
94     Iterator(const Iterator&) = default;
95     Iterator& operator=(const Iterator&) = default;
96     Iterator(Iterator&&) noexcept = default;
97     Iterator& operator=(Iterator&&) noexcept = default;
98 
key()99     Key& key() { return map_->keys_[idx_]; }
value()100     Value& value() { return map_->values_[idx_]; }
key()101     const Key& key() const { return map_->keys_[idx_]; }
value()102     const Value& value() const { return map_->values_[idx_]; }
103 
104     explicit operator bool() const { return idx_ != kEnd; }
105     Iterator& operator++() {
106       PERFETTO_DCHECK(idx_ < map_->capacity_);
107       ++idx_;
108       FindNextNonFree();
109       return *this;
110     }
111 
112    private:
113     static constexpr size_t kEnd = std::numeric_limits<size_t>::max();
114 
FindNextNonFree()115     void FindNextNonFree() {
116       const auto& tags = map_->tags_;
117       for (; idx_ < map_->capacity_; idx_++) {
118         if (tags[idx_] != kFreeSlot && (AppendOnly || tags[idx_] != kTombstone))
119           return;
120       }
121       idx_ = kEnd;
122     }
123 
124     const FlatHashMap* map_ = nullptr;
125     size_t idx_ = 0;
126   };  // Iterator
127 
128   static constexpr int kDefaultLoadLimitPct = 75;
129   explicit FlatHashMap(size_t initial_capacity = 0,
130                        int load_limit_pct = kDefaultLoadLimitPct)
load_limit_percent_(load_limit_pct)131       : load_limit_percent_(load_limit_pct) {
132     if (initial_capacity > 0)
133       Reset(initial_capacity);
134   }
135 
136   // We are calling Clear() so that the destructors for the inserted entries are
137   // called (unless they are trivial, in which case it will be a no-op).
~FlatHashMap()138   ~FlatHashMap() { Clear(); }
139 
FlatHashMap(FlatHashMap && other)140   FlatHashMap(FlatHashMap&& other) noexcept {
141     tags_ = std::move(other.tags_);
142     keys_ = std::move(other.keys_);
143     values_ = std::move(other.values_);
144     capacity_ = other.capacity_;
145     size_ = other.size_;
146     max_probe_length_ = other.max_probe_length_;
147     load_limit_ = other.load_limit_;
148     load_limit_percent_ = other.load_limit_percent_;
149 
150     new (&other) FlatHashMap();
151   }
152 
153   FlatHashMap& operator=(FlatHashMap&& other) noexcept {
154     this->~FlatHashMap();
155     new (this) FlatHashMap(std::move(other));
156     return *this;
157   }
158 
159   FlatHashMap(const FlatHashMap&) = delete;
160   FlatHashMap& operator=(const FlatHashMap&) = delete;
161 
Insert(Key key,Value value)162   std::pair<Value*, bool> Insert(Key key, Value value) {
163     const size_t key_hash = Hasher{}(key);
164     const uint8_t tag = HashToTag(key_hash);
165     static constexpr size_t kSlotNotFound = std::numeric_limits<size_t>::max();
166 
167     // This for loop does in reality at most two attempts:
168     // The first iteration either:
169     //  - Early-returns, because the key exists already,
170     //  - Finds an insertion slot and proceeds because the load is < limit.
171     // The second iteration is only hit in the unlikely case of this insertion
172     // bringing the table beyond the target |load_limit_| (or the edge case
173     // of the HT being full, if |load_limit_pct_| = 100).
174     // We cannot simply pre-grow the table before insertion, because we must
175     // guarantee that calling Insert() with a key that already exists doesn't
176     // invalidate iterators.
177     size_t insertion_slot;
178     size_t probe_len;
179     for (;;) {
180       PERFETTO_DCHECK((capacity_ & (capacity_ - 1)) == 0);  // Must be a pow2.
181       insertion_slot = kSlotNotFound;
182       // Start the iteration at the desired slot (key_hash % capacity_)
183       // searching either for a free slot or a tombstone. In the worst case we
184       // might end up scanning the whole array of slots. The Probe functions are
185       // guaranteed to visit all the slots within |capacity_| steps. If we find
186       // a free slot, we can stop the search immediately (a free slot acts as an
187       // "end of chain for entries having the same hash". If we find a
188       // tombstones (a deleted slot) we remember its position, but have to keep
189       // searching until a free slot to make sure we don't insert a duplicate
190       // key.
191       for (probe_len = 0; probe_len < capacity_;) {
192         const size_t idx = Probe::Calc(key_hash, probe_len, capacity_);
193         PERFETTO_DCHECK(idx < capacity_);
194         const uint8_t tag_idx = tags_[idx];
195         ++probe_len;
196         if (tag_idx == kFreeSlot) {
197           // Rationale for "insertion_slot == kSlotNotFound": if we encountered
198           // a tombstone while iterating we should reuse that rather than
199           // taking another slot.
200           if (AppendOnly || insertion_slot == kSlotNotFound)
201             insertion_slot = idx;
202           break;
203         }
204         // We should never encounter tombstones in AppendOnly mode.
205         PERFETTO_DCHECK(!(tag_idx == kTombstone && AppendOnly));
206         if (!AppendOnly && tag_idx == kTombstone) {
207           insertion_slot = idx;
208           continue;
209         }
210         if (tag_idx == tag && keys_[idx] == key) {
211           // The key is already in the map.
212           return std::make_pair(&values_[idx], false);
213         }
214       }  // for (idx)
215 
216       // If we got to this point the key does not exist (otherwise we would have
217       // hit the return above) and we are going to insert a new entry.
218       // Before doing so, ensure we stay under the target load limit.
219       if (PERFETTO_UNLIKELY(size_ >= load_limit_)) {
220         MaybeGrowAndRehash(/*grow=*/true);
221         continue;
222       }
223       PERFETTO_DCHECK(insertion_slot != kSlotNotFound);
224       break;
225     }  // for (attempt)
226 
227     PERFETTO_CHECK(insertion_slot < capacity_);
228 
229     // We found a free slot (or a tombstone). Proceed with the insertion.
230     Value* value_idx = &values_[insertion_slot];
231     new (&keys_[insertion_slot]) Key(std::move(key));
232     new (value_idx) Value(std::move(value));
233     tags_[insertion_slot] = tag;
234     PERFETTO_DCHECK(probe_len > 0 && probe_len <= capacity_);
235     max_probe_length_ = std::max(max_probe_length_, probe_len);
236     size_++;
237 
238     return std::make_pair(value_idx, true);
239   }
240 
Find(const Key & key)241   Value* Find(const Key& key) const {
242     const size_t idx = FindInternal(key);
243     if (idx == kNotFound)
244       return nullptr;
245     return &values_[idx];
246   }
247 
Erase(const Key & key)248   bool Erase(const Key& key) {
249     if (AppendOnly)
250       PERFETTO_FATAL("Erase() not supported because AppendOnly=true");
251     size_t idx = FindInternal(key);
252     if (idx == kNotFound)
253       return false;
254     EraseInternal(idx);
255     return true;
256   }
257 
Clear()258   void Clear() {
259     // Avoid trivial heap operations on zero-capacity std::move()-d objects.
260     if (PERFETTO_UNLIKELY(capacity_ == 0))
261       return;
262 
263     for (size_t i = 0; i < capacity_; ++i) {
264       const uint8_t tag = tags_[i];
265       if (tag != kFreeSlot && tag != kTombstone)
266         EraseInternal(i);
267     }
268     // Clear all tombstones. We really need to do this for AppendOnly.
269     MaybeGrowAndRehash(/*grow=*/false);
270   }
271 
272   Value& operator[](Key key) {
273     auto it_and_inserted = Insert(std::move(key), Value{});
274     return *it_and_inserted.first;
275   }
276 
GetIterator()277   Iterator GetIterator() { return Iterator(this); }
GetIterator()278   const Iterator GetIterator() const { return Iterator(this); }
279 
size()280   size_t size() const { return size_; }
capacity()281   size_t capacity() const { return capacity_; }
282 
283   // "protected" here is only for the flat_hash_map_benchmark.cc. Everything
284   // below is by all means private.
285  protected:
286   enum ReservedTags : uint8_t { kFreeSlot = 0, kTombstone = 1 };
287   static constexpr size_t kNotFound = std::numeric_limits<size_t>::max();
288 
FindInternal(const Key & key)289   size_t FindInternal(const Key& key) const {
290     const size_t key_hash = Hasher{}(key);
291     const uint8_t tag = HashToTag(key_hash);
292     PERFETTO_DCHECK((capacity_ & (capacity_ - 1)) == 0);  // Must be a pow2.
293     PERFETTO_DCHECK(max_probe_length_ <= capacity_);
294     for (size_t i = 0; i < max_probe_length_; ++i) {
295       const size_t idx = Probe::Calc(key_hash, i, capacity_);
296       const uint8_t tag_idx = tags_[idx];
297 
298       if (tag_idx == kFreeSlot)
299         return kNotFound;
300       // HashToTag() never returns kTombstone, so the tag-check below cannot
301       // possibly match. Also we just want to skip tombstones.
302       if (tag_idx == tag && keys_[idx] == key) {
303         PERFETTO_DCHECK(tag_idx > kTombstone);
304         return idx;
305       }
306     }  // for (idx)
307     return kNotFound;
308   }
309 
EraseInternal(size_t idx)310   void EraseInternal(size_t idx) {
311     PERFETTO_DCHECK(tags_[idx] > kTombstone);
312     PERFETTO_DCHECK(size_ > 0);
313     tags_[idx] = kTombstone;
314     keys_[idx].~Key();
315     values_[idx].~Value();
316     size_--;
317   }
318 
MaybeGrowAndRehash(bool grow)319   PERFETTO_NO_INLINE void MaybeGrowAndRehash(bool grow) {
320     PERFETTO_DCHECK(size_ <= capacity_);
321     const size_t old_capacity = capacity_;
322 
323     // Grow quickly up to 1MB, then chill.
324     const size_t old_size_bytes = old_capacity * (sizeof(Key) + sizeof(Value));
325     const size_t grow_factor = old_size_bytes < (1024u * 1024u) ? 8 : 2;
326     const size_t new_capacity =
327         grow ? std::max(old_capacity * grow_factor, size_t(1024))
328              : old_capacity;
329 
330     auto old_tags(std::move(tags_));
331     auto old_keys(std::move(keys_));
332     auto old_values(std::move(values_));
333     size_t old_size = size_;
334 
335     // This must be a CHECK (i.e. not just a DCHECK) to prevent UAF attacks on
336     // 32-bit archs that try to double the size of the table until wrapping.
337     PERFETTO_CHECK(new_capacity >= old_capacity);
338     Reset(new_capacity);
339 
340     size_t new_size = 0;  // Recompute the size.
341     for (size_t i = 0; i < old_capacity; ++i) {
342       const uint8_t old_tag = old_tags[i];
343       if (old_tag != kFreeSlot && old_tag != kTombstone) {
344         Insert(std::move(old_keys[i]), std::move(old_values[i]));
345         old_keys[i].~Key();  // Destroy the old objects.
346         old_values[i].~Value();
347         new_size++;
348       }
349     }
350     PERFETTO_DCHECK(new_size == old_size);
351     size_ = new_size;
352   }
353 
354   // Doesn't call destructors. Use Clear() for that.
Reset(size_t n)355   PERFETTO_NO_INLINE void Reset(size_t n) {
356     PERFETTO_DCHECK((n & (n - 1)) == 0);  // Must be a pow2.
357 
358     capacity_ = n;
359     max_probe_length_ = 0;
360     size_ = 0;
361     load_limit_ = n * static_cast<size_t>(load_limit_percent_) / 100;
362     load_limit_ = std::min(load_limit_, n);
363 
364     tags_.reset(new uint8_t[n]);
365     memset(&tags_[0], 0, n);                  // Clear all tags.
366     keys_ = AlignedAllocTyped<Key[]>(n);      // Deliberately not 0-initialized.
367     values_ = AlignedAllocTyped<Value[]>(n);  // Deliberately not 0-initialized.
368   }
369 
HashToTag(size_t full_hash)370   static inline uint8_t HashToTag(size_t full_hash) {
371     uint8_t tag = full_hash >> (sizeof(full_hash) * 8 - 8);
372     // Ensure the hash is always >= 2. We use 0, 1 for kFreeSlot and kTombstone.
373     tag += (tag <= kTombstone) << 1;
374     PERFETTO_DCHECK(tag > kTombstone);
375     return tag;
376   }
377 
378   size_t capacity_ = 0;
379   size_t size_ = 0;
380   size_t max_probe_length_ = 0;
381   size_t load_limit_ = 0;  // Updated every time |capacity_| changes.
382   int load_limit_percent_ =
383       kDefaultLoadLimitPct;  // Load factor limit in % of |capacity_|.
384 
385   // These arrays have always the |capacity_| elements.
386   // Note: AlignedUniquePtr just allocates memory, doesn't invoke any ctor/dtor.
387   std::unique_ptr<uint8_t[]> tags_;
388   AlignedUniquePtr<Key[]> keys_;
389   AlignedUniquePtr<Value[]> values_;
390 };
391 
392 }  // namespace base
393 }  // namespace perfetto
394 
395 #endif  // INCLUDE_PERFETTO_EXT_BASE_FLAT_HASH_MAP_H_
396