• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2021 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef INCLUDE_PERFETTO_EXT_BASE_FLAT_HASH_MAP_H_
18 #define INCLUDE_PERFETTO_EXT_BASE_FLAT_HASH_MAP_H_
19 
20 #include "perfetto/base/compiler.h"
21 #include "perfetto/base/logging.h"
22 #include "perfetto/ext/base/hash.h"
23 #include "perfetto/ext/base/utils.h"
24 
25 #include <algorithm>
26 #include <limits>
27 
28 namespace perfetto {
29 namespace base {
30 
31 // An open-addressing hashmap implementation.
32 // Pointers are not stable, neither for keys nor values.
33 // Has similar performances of a RobinHood hash (without the complications)
34 // and 2x an unordered map.
35 // Doc: http://go/perfetto-hashtables .
36 //
37 // When used to implement a string pool in TraceProcessor, the performance
38 // characteristics obtained by replaying the set of strings seeen in a 4GB trace
39 // (226M strings, 1M unique) are the following (see flat_hash_map_benchmark.cc):
40 // This(Linear+AppendOnly)    879,383,676 ns    258.013M insertions/s
41 // This(LinearProbe):         909,206,047 ns    249.546M insertions/s
42 // This(QuadraticProbe):    1,083,844,388 ns    209.363M insertions/s
43 // std::unordered_map:      6,203,351,870 ns    36.5811M insertions/s
44 // tsl::robin_map:            931,403,397 ns    243.622M insertions/s
45 // absl::flat_hash_map:       998,013,459 ns    227.379M insertions/s
46 // FollyF14FastMap:         1,181,480,602 ns    192.074M insertions/s
47 //
48 // TODO(primiano): the table regresses for heavy insert+erase workloads since we
49 // don't clean up tombstones outside of resizes. In the limit, the entire
50 // table's capacity is made up of values/tombstones, so each search has to
51 // exhaustively scan the full capacity.
52 
53 // The structs below define the probing algorithm used to probe slots upon a
54 // collision. They are guaranteed to visit all slots as our table size is always
55 // a power of two (see https://en.wikipedia.org/wiki/Quadratic_probing).
56 
57 // Linear probing can be faster if the hashing is well distributed and the load
58 // is not high. For TraceProcessor's StringPool this is the fastest. It can
59 // degenerate badly if the hashing doesn't spread (e.g., if using directly pids
60 // as keys, with a no-op hashing function).
61 struct LinearProbe {
CalcLinearProbe62   static inline size_t Calc(size_t key_hash, size_t step, size_t capacity) {
63     return (key_hash + step) & (capacity - 1);  // Linear probe
64   }
65 };
66 
67 // Generates the sequence: 0, 3, 10, 21, 36, 55, ...
68 // Can be a bit (~5%) slower than LinearProbe because it's less cache hot, but
69 // avoids degenerating badly if the hash function is bad and causes clusters.
70 // A good default choice unless benchmarks prove otherwise.
71 struct QuadraticProbe {
CalcQuadraticProbe72   static inline size_t Calc(size_t key_hash, size_t step, size_t capacity) {
73     return (key_hash + 2 * step * step + step) & (capacity - 1);
74   }
75 };
76 
77 // Tends to perform in the middle between linear and quadratic.
78 // It's a bit more cache-effective than the QuadraticProbe but can create more
79 // clustering if the hash function doesn't spread well.
80 // Generates the sequence: 0, 1, 3, 6, 10, 15, 21, ...
81 struct QuadraticHalfProbe {
CalcQuadraticHalfProbe82   static inline size_t Calc(size_t key_hash, size_t step, size_t capacity) {
83     return (key_hash + (step * step + step) / 2) & (capacity - 1);
84   }
85 };
86 
87 template <typename Key,
88           typename Value,
89           typename Hasher = base::Hash<Key>,
90           typename Probe = QuadraticProbe,
91           bool AppendOnly = false>
92 class FlatHashMap {
93  public:
94   class Iterator {
95    public:
Iterator(const FlatHashMap * map)96     explicit Iterator(const FlatHashMap* map) : map_(map) { FindNextNonFree(); }
97     ~Iterator() = default;
98     Iterator(const Iterator&) = default;
99     Iterator& operator=(const Iterator&) = default;
100     Iterator(Iterator&&) noexcept = default;
101     Iterator& operator=(Iterator&&) noexcept = default;
102 
key()103     Key& key() { return map_->keys_[idx_]; }
value()104     Value& value() { return map_->values_[idx_]; }
key()105     const Key& key() const { return map_->keys_[idx_]; }
value()106     const Value& value() const { return map_->values_[idx_]; }
107 
108     explicit operator bool() const { return idx_ != kEnd; }
109     Iterator& operator++() {
110       PERFETTO_DCHECK(idx_ < map_->capacity_);
111       ++idx_;
112       FindNextNonFree();
113       return *this;
114     }
115 
116    private:
117     static constexpr size_t kEnd = std::numeric_limits<size_t>::max();
118 
FindNextNonFree()119     void FindNextNonFree() {
120       const auto& tags = map_->tags_;
121       for (; idx_ < map_->capacity_; idx_++) {
122         if (tags[idx_] != kFreeSlot && (AppendOnly || tags[idx_] != kTombstone))
123           return;
124       }
125       idx_ = kEnd;
126     }
127 
128     const FlatHashMap* map_ = nullptr;
129     size_t idx_ = 0;
130   };  // Iterator
131 
132   static constexpr int kDefaultLoadLimitPct = 75;
133   explicit FlatHashMap(size_t initial_capacity = 0,
134                        int load_limit_pct = kDefaultLoadLimitPct)
load_limit_percent_(load_limit_pct)135       : load_limit_percent_(load_limit_pct) {
136     if (initial_capacity > 0)
137       Reset(initial_capacity);
138   }
139 
140   // We are calling Clear() so that the destructors for the inserted entries are
141   // called (unless they are trivial, in which case it will be a no-op).
~FlatHashMap()142   ~FlatHashMap() { Clear(); }
143 
FlatHashMap(FlatHashMap && other)144   FlatHashMap(FlatHashMap&& other) noexcept {
145     tags_ = std::move(other.tags_);
146     keys_ = std::move(other.keys_);
147     values_ = std::move(other.values_);
148     capacity_ = other.capacity_;
149     size_ = other.size_;
150     max_probe_length_ = other.max_probe_length_;
151     load_limit_ = other.load_limit_;
152     load_limit_percent_ = other.load_limit_percent_;
153 
154     new (&other) FlatHashMap();
155   }
156 
157   FlatHashMap& operator=(FlatHashMap&& other) noexcept {
158     this->~FlatHashMap();
159     new (this) FlatHashMap(std::move(other));
160     return *this;
161   }
162 
163   FlatHashMap(const FlatHashMap&) = delete;
164   FlatHashMap& operator=(const FlatHashMap&) = delete;
165 
Insert(Key key,Value value)166   std::pair<Value*, bool> Insert(Key key, Value value) {
167     const size_t key_hash = Hasher{}(key);
168     const uint8_t tag = HashToTag(key_hash);
169     static constexpr size_t kSlotNotFound = std::numeric_limits<size_t>::max();
170 
171     // This for loop does in reality at most two attempts:
172     // The first iteration either:
173     //  - Early-returns, because the key exists already,
174     //  - Finds an insertion slot and proceeds because the load is < limit.
175     // The second iteration is only hit in the unlikely case of this insertion
176     // bringing the table beyond the target |load_limit_| (or the edge case
177     // of the HT being full, if |load_limit_pct_| = 100).
178     // We cannot simply pre-grow the table before insertion, because we must
179     // guarantee that calling Insert() with a key that already exists doesn't
180     // invalidate iterators.
181     size_t insertion_slot;
182     size_t probe_len;
183     for (;;) {
184       PERFETTO_DCHECK((capacity_ & (capacity_ - 1)) == 0);  // Must be a pow2.
185       insertion_slot = kSlotNotFound;
186       // Start the iteration at the desired slot (key_hash % capacity_)
187       // searching either for a free slot or a tombstone. In the worst case we
188       // might end up scanning the whole array of slots. The Probe functions are
189       // guaranteed to visit all the slots within |capacity_| steps. If we find
190       // a free slot, we can stop the search immediately (a free slot acts as an
191       // "end of chain for entries having the same hash". If we find a
192       // tombstones (a deleted slot) we remember its position, but have to keep
193       // searching until a free slot to make sure we don't insert a duplicate
194       // key.
195       for (probe_len = 0; probe_len < capacity_;) {
196         const size_t idx = Probe::Calc(key_hash, probe_len, capacity_);
197         PERFETTO_DCHECK(idx < capacity_);
198         const uint8_t tag_idx = tags_[idx];
199         ++probe_len;
200         if (tag_idx == kFreeSlot) {
201           // Rationale for "insertion_slot == kSlotNotFound": if we encountered
202           // a tombstone while iterating we should reuse that rather than
203           // taking another slot.
204           if (AppendOnly || insertion_slot == kSlotNotFound)
205             insertion_slot = idx;
206           break;
207         }
208         // We should never encounter tombstones in AppendOnly mode.
209         PERFETTO_DCHECK(!(tag_idx == kTombstone && AppendOnly));
210         if (!AppendOnly && tag_idx == kTombstone) {
211           insertion_slot = idx;
212           continue;
213         }
214         if (tag_idx == tag && keys_[idx] == key) {
215           // The key is already in the map.
216           return std::make_pair(&values_[idx], false);
217         }
218       }  // for (idx)
219 
220       // If we got to this point the key does not exist (otherwise we would have
221       // hit the return above) and we are going to insert a new entry.
222       // Before doing so, ensure we stay under the target load limit.
223       if (PERFETTO_UNLIKELY(size_ >= load_limit_)) {
224         MaybeGrowAndRehash(/*grow=*/true);
225         continue;
226       }
227       PERFETTO_DCHECK(insertion_slot != kSlotNotFound);
228       break;
229     }  // for (attempt)
230 
231     PERFETTO_CHECK(insertion_slot < capacity_);
232 
233     // We found a free slot (or a tombstone). Proceed with the insertion.
234     Value* value_idx = &values_[insertion_slot];
235     new (&keys_[insertion_slot]) Key(std::move(key));
236     new (value_idx) Value(std::move(value));
237     tags_[insertion_slot] = tag;
238     PERFETTO_DCHECK(probe_len > 0 && probe_len <= capacity_);
239     max_probe_length_ = std::max(max_probe_length_, probe_len);
240     size_++;
241 
242     return std::make_pair(value_idx, true);
243   }
244 
Find(const Key & key)245   Value* Find(const Key& key) const {
246     const size_t idx = FindInternal(key);
247     if (idx == kNotFound)
248       return nullptr;
249     return &values_[idx];
250   }
251 
Erase(const Key & key)252   bool Erase(const Key& key) {
253     if (AppendOnly)
254       PERFETTO_FATAL("Erase() not supported because AppendOnly=true");
255     size_t idx = FindInternal(key);
256     if (idx == kNotFound)
257       return false;
258     EraseInternal(idx);
259     return true;
260   }
261 
Clear()262   void Clear() {
263     // Avoid trivial heap operations on zero-capacity std::move()-d objects.
264     if (PERFETTO_UNLIKELY(capacity_ == 0))
265       return;
266 
267     for (size_t i = 0; i < capacity_; ++i) {
268       const uint8_t tag = tags_[i];
269       if (tag != kFreeSlot && tag != kTombstone)
270         EraseInternal(i);
271     }
272     // Clear all tombstones. We really need to do this for AppendOnly.
273     MaybeGrowAndRehash(/*grow=*/false);
274   }
275 
276   Value& operator[](Key key) {
277     auto it_and_inserted = Insert(std::move(key), Value{});
278     return *it_and_inserted.first;
279   }
280 
GetIterator()281   Iterator GetIterator() { return Iterator(this); }
GetIterator()282   const Iterator GetIterator() const { return Iterator(this); }
283 
size()284   size_t size() const { return size_; }
capacity()285   size_t capacity() const { return capacity_; }
286 
287   // "protected" here is only for the flat_hash_map_benchmark.cc. Everything
288   // below is by all means private.
289  protected:
290   enum ReservedTags : uint8_t { kFreeSlot = 0, kTombstone = 1 };
291   static constexpr size_t kNotFound = std::numeric_limits<size_t>::max();
292 
FindInternal(const Key & key)293   size_t FindInternal(const Key& key) const {
294     const size_t key_hash = Hasher{}(key);
295     const uint8_t tag = HashToTag(key_hash);
296     PERFETTO_DCHECK((capacity_ & (capacity_ - 1)) == 0);  // Must be a pow2.
297     PERFETTO_DCHECK(max_probe_length_ <= capacity_);
298     for (size_t i = 0; i < max_probe_length_; ++i) {
299       const size_t idx = Probe::Calc(key_hash, i, capacity_);
300       const uint8_t tag_idx = tags_[idx];
301 
302       if (tag_idx == kFreeSlot)
303         return kNotFound;
304       // HashToTag() never returns kTombstone, so the tag-check below cannot
305       // possibly match. Also we just want to skip tombstones.
306       if (tag_idx == tag && keys_[idx] == key) {
307         PERFETTO_DCHECK(tag_idx > kTombstone);
308         return idx;
309       }
310     }  // for (idx)
311     return kNotFound;
312   }
313 
EraseInternal(size_t idx)314   void EraseInternal(size_t idx) {
315     PERFETTO_DCHECK(tags_[idx] > kTombstone);
316     PERFETTO_DCHECK(size_ > 0);
317     tags_[idx] = kTombstone;
318     keys_[idx].~Key();
319     values_[idx].~Value();
320     size_--;
321   }
322 
MaybeGrowAndRehash(bool grow)323   PERFETTO_NO_INLINE void MaybeGrowAndRehash(bool grow) {
324     PERFETTO_DCHECK(size_ <= capacity_);
325     const size_t old_capacity = capacity_;
326 
327     // Grow quickly up to 1MB, then chill.
328     const size_t old_size_bytes = old_capacity * (sizeof(Key) + sizeof(Value));
329     const size_t grow_factor = old_size_bytes < (1024u * 1024u) ? 8 : 2;
330     const size_t new_capacity =
331         grow ? std::max(old_capacity * grow_factor, size_t(1024))
332              : old_capacity;
333 
334     auto old_tags(std::move(tags_));
335     auto old_keys(std::move(keys_));
336     auto old_values(std::move(values_));
337     size_t old_size = size_;
338 
339     // This must be a CHECK (i.e. not just a DCHECK) to prevent UAF attacks on
340     // 32-bit archs that try to double the size of the table until wrapping.
341     PERFETTO_CHECK(new_capacity >= old_capacity);
342     Reset(new_capacity);
343 
344     size_t new_size = 0;  // Recompute the size.
345     for (size_t i = 0; i < old_capacity; ++i) {
346       const uint8_t old_tag = old_tags[i];
347       if (old_tag != kFreeSlot && old_tag != kTombstone) {
348         Insert(std::move(old_keys[i]), std::move(old_values[i]));
349         old_keys[i].~Key();  // Destroy the old objects.
350         old_values[i].~Value();
351         new_size++;
352       }
353     }
354     PERFETTO_DCHECK(new_size == old_size);
355     size_ = new_size;
356   }
357 
358   // Doesn't call destructors. Use Clear() for that.
Reset(size_t n)359   PERFETTO_NO_INLINE void Reset(size_t n) {
360     PERFETTO_DCHECK((n & (n - 1)) == 0);  // Must be a pow2.
361 
362     capacity_ = n;
363     max_probe_length_ = 0;
364     size_ = 0;
365     load_limit_ = n * static_cast<size_t>(load_limit_percent_) / 100;
366     load_limit_ = std::min(load_limit_, n);
367 
368     tags_.reset(new uint8_t[n]);
369     memset(&tags_[0], 0, n);                  // Clear all tags.
370     keys_ = AlignedAllocTyped<Key[]>(n);      // Deliberately not 0-initialized.
371     values_ = AlignedAllocTyped<Value[]>(n);  // Deliberately not 0-initialized.
372   }
373 
HashToTag(size_t full_hash)374   static inline uint8_t HashToTag(size_t full_hash) {
375     uint8_t tag = full_hash >> (sizeof(full_hash) * 8 - 8);
376     // Ensure the hash is always >= 2. We use 0, 1 for kFreeSlot and kTombstone.
377     tag += (tag <= kTombstone) << 1;
378     PERFETTO_DCHECK(tag > kTombstone);
379     return tag;
380   }
381 
382   size_t capacity_ = 0;
383   size_t size_ = 0;
384   size_t max_probe_length_ = 0;
385   size_t load_limit_ = 0;  // Updated every time |capacity_| changes.
386   int load_limit_percent_ =
387       kDefaultLoadLimitPct;  // Load factor limit in % of |capacity_|.
388 
389   // These arrays have always the |capacity_| elements.
390   // Note: AlignedUniquePtr just allocates memory, doesn't invoke any ctor/dtor.
391   std::unique_ptr<uint8_t[]> tags_;
392   AlignedUniquePtr<Key[]> keys_;
393   AlignedUniquePtr<Value[]> values_;
394 };
395 
396 }  // namespace base
397 }  // namespace perfetto
398 
399 #endif  // INCLUDE_PERFETTO_EXT_BASE_FLAT_HASH_MAP_H_
400