1 /* 2 * Copyright (C) 2021 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef INCLUDE_PERFETTO_EXT_BASE_FLAT_HASH_MAP_H_ 18 #define INCLUDE_PERFETTO_EXT_BASE_FLAT_HASH_MAP_H_ 19 20 #include "perfetto/base/compiler.h" 21 #include "perfetto/base/logging.h" 22 #include "perfetto/ext/base/utils.h" 23 24 #include <algorithm> 25 #include <functional> 26 #include <limits> 27 28 namespace perfetto { 29 namespace base { 30 31 // An open-addressing hashmap implementation. 32 // Pointers are not stable, neither for keys nor values. 33 // Has similar performances of a RobinHood hash (without the complications) 34 // and 2x an unordered map. 35 // Doc: http://go/perfetto-hashtables . 36 // 37 // When used to implement a string pool in TraceProcessor, the performance 38 // characteristics obtained by replaying the set of strings seeen in a 4GB trace 39 // (226M strings, 1M unique) are the following (see flat_hash_map_benchmark.cc): 40 // This(Linear+AppendOnly) 879,383,676 ns 258.013M insertions/s 41 // This(LinearProbe): 909,206,047 ns 249.546M insertions/s 42 // This(QuadraticProbe): 1,083,844,388 ns 209.363M insertions/s 43 // std::unordered_map: 6,203,351,870 ns 36.5811M insertions/s 44 // tsl::robin_map: 931,403,397 ns 243.622M insertions/s 45 // absl::flat_hash_map: 998,013,459 ns 227.379M insertions/s 46 // FollyF14FastMap: 1,181,480,602 ns 192.074M insertions/s 47 48 // The structs below define the probing algorithm used to probe slots upon a 49 // collision. They are guaranteed to visit all slots as our table size is always 50 // a power of two (see https://en.wikipedia.org/wiki/Quadratic_probing). 51 52 // Linear probing can be faster if the hashing is well distributed and the load 53 // is not high. For TraceProcessor's StringPool this is the fastest. It can 54 // degenerate badly if the hashing doesn't spread (e.g., if using directly pids 55 // as keys, with a no-op hashing function). 56 struct LinearProbe { CalcLinearProbe57 static inline size_t Calc(size_t key_hash, size_t step, size_t capacity) { 58 return (key_hash + step) & (capacity - 1); // Linear probe 59 } 60 }; 61 62 // Generates the sequence: 0, 3, 10, 21, 36, 55, ... 63 // Can be a bit (~5%) slower than LinearProbe because it's less cache hot, but 64 // avoids degenerating badly if the hash function is bad and causes clusters. 65 // A good default choice unless benchmarks prove otherwise. 66 struct QuadraticProbe { CalcQuadraticProbe67 static inline size_t Calc(size_t key_hash, size_t step, size_t capacity) { 68 return (key_hash + 2 * step * step + step) & (capacity - 1); 69 } 70 }; 71 72 // Tends to perform in the middle between linear and quadratic. 73 // It's a bit more cache-effective than the QuadraticProbe but can create more 74 // clustering if the hash function doesn't spread well. 75 // Generates the sequence: 0, 1, 3, 6, 10, 15, 21, ... 76 struct QuadraticHalfProbe { CalcQuadraticHalfProbe77 static inline size_t Calc(size_t key_hash, size_t step, size_t capacity) { 78 return (key_hash + (step * step + step) / 2) & (capacity - 1); 79 } 80 }; 81 82 template <typename Key, 83 typename Value, 84 typename Hasher = std::hash<Key>, 85 typename Probe = QuadraticProbe, 86 bool AppendOnly = false> 87 class FlatHashMap { 88 public: 89 class Iterator { 90 public: Iterator(const FlatHashMap * map)91 explicit Iterator(const FlatHashMap* map) : map_(map) { FindNextNonFree(); } 92 ~Iterator() = default; 93 Iterator(const Iterator&) = default; 94 Iterator& operator=(const Iterator&) = default; 95 Iterator(Iterator&&) noexcept = default; 96 Iterator& operator=(Iterator&&) noexcept = default; 97 key()98 Key& key() { return map_->keys_[idx_]; } value()99 Value& value() { return map_->values_[idx_]; } key()100 const Key& key() const { return map_->keys_[idx_]; } value()101 const Value& value() const { return map_->values_[idx_]; } 102 103 explicit operator bool() const { return idx_ != kEnd; } 104 Iterator& operator++() { 105 PERFETTO_DCHECK(idx_ < map_->capacity_); 106 ++idx_; 107 FindNextNonFree(); 108 return *this; 109 } 110 111 private: 112 static constexpr size_t kEnd = std::numeric_limits<size_t>::max(); 113 FindNextNonFree()114 void FindNextNonFree() { 115 const auto& tags = map_->tags_; 116 for (; idx_ < map_->capacity_; idx_++) { 117 if (tags[idx_] != kFreeSlot && (AppendOnly || tags[idx_] != kTombstone)) 118 return; 119 } 120 idx_ = kEnd; 121 } 122 123 const FlatHashMap* map_ = nullptr; 124 size_t idx_ = 0; 125 }; // Iterator 126 127 static constexpr int kDefaultLoadLimitPct = 75; 128 explicit FlatHashMap(size_t initial_capacity = 0, 129 int load_limit_pct = kDefaultLoadLimitPct) load_limit_percent_(load_limit_pct)130 : load_limit_percent_(load_limit_pct) { 131 if (initial_capacity > 0) 132 Reset(initial_capacity); 133 } 134 135 // We are calling Clear() so that the destructors for the inserted entries are 136 // called (unless they are trivial, in which case it will be a no-op). ~FlatHashMap()137 ~FlatHashMap() { Clear(); } 138 FlatHashMap(FlatHashMap && other)139 FlatHashMap(FlatHashMap&& other) noexcept { 140 tags_ = std::move(other.tags_); 141 keys_ = std::move(other.keys_); 142 values_ = std::move(other.values_); 143 capacity_ = other.capacity_; 144 size_ = other.size_; 145 max_probe_length_ = other.max_probe_length_; 146 load_limit_ = other.load_limit_; 147 load_limit_percent_ = other.load_limit_percent_; 148 149 new (&other) FlatHashMap(); 150 } 151 152 FlatHashMap& operator=(FlatHashMap&& other) noexcept { 153 this->~FlatHashMap(); 154 new (this) FlatHashMap(std::move(other)); 155 return *this; 156 } 157 158 FlatHashMap(const FlatHashMap&) = delete; 159 FlatHashMap& operator=(const FlatHashMap&) = delete; 160 Insert(Key key,Value value)161 std::pair<Value*, bool> Insert(Key key, Value value) { 162 const size_t key_hash = Hasher{}(key); 163 const uint8_t tag = HashToTag(key_hash); 164 static constexpr size_t kSlotNotFound = std::numeric_limits<size_t>::max(); 165 166 // This for loop does in reality at most two attempts: 167 // The first iteration either: 168 // - Early-returns, because the key exists already, 169 // - Finds an insertion slot and proceeds because the load is < limit. 170 // The second iteration is only hit in the unlikely case of this insertion 171 // bringing the table beyond the target |load_limit_| (or the edge case 172 // of the HT being full, if |load_limit_pct_| = 100). 173 // We cannot simply pre-grow the table before insertion, because we must 174 // guarantee that calling Insert() with a key that already exists doesn't 175 // invalidate iterators. 176 size_t insertion_slot; 177 size_t probe_len; 178 for (;;) { 179 PERFETTO_DCHECK((capacity_ & (capacity_ - 1)) == 0); // Must be a pow2. 180 insertion_slot = kSlotNotFound; 181 // Start the iteration at the desired slot (key_hash % capacity_) 182 // searching either for a free slot or a tombstone. In the worst case we 183 // might end up scanning the whole array of slots. The Probe functions are 184 // guaranteed to visit all the slots within |capacity_| steps. If we find 185 // a free slot, we can stop the search immediately (a free slot acts as an 186 // "end of chain for entries having the same hash". If we find a 187 // tombstones (a deleted slot) we remember its position, but have to keep 188 // searching until a free slot to make sure we don't insert a duplicate 189 // key. 190 for (probe_len = 0; probe_len < capacity_;) { 191 const size_t idx = Probe::Calc(key_hash, probe_len, capacity_); 192 PERFETTO_DCHECK(idx < capacity_); 193 const uint8_t tag_idx = tags_[idx]; 194 ++probe_len; 195 if (tag_idx == kFreeSlot) { 196 // Rationale for "insertion_slot == kSlotNotFound": if we encountered 197 // a tombstone while iterating we should reuse that rather than 198 // taking another slot. 199 if (AppendOnly || insertion_slot == kSlotNotFound) 200 insertion_slot = idx; 201 break; 202 } 203 // We should never encounter tombstones in AppendOnly mode. 204 PERFETTO_DCHECK(!(tag_idx == kTombstone && AppendOnly)); 205 if (!AppendOnly && tag_idx == kTombstone) { 206 insertion_slot = idx; 207 continue; 208 } 209 if (tag_idx == tag && keys_[idx] == key) { 210 // The key is already in the map. 211 return std::make_pair(&values_[idx], false); 212 } 213 } // for (idx) 214 215 // If we got to this point the key does not exist (otherwise we would have 216 // hit the the return above) and we are going to insert a new entry. 217 // Before doing so, ensure we stay under the target load limit. 218 if (PERFETTO_UNLIKELY(size_ >= load_limit_)) { 219 MaybeGrowAndRehash(/*grow=*/true); 220 continue; 221 } 222 PERFETTO_DCHECK(insertion_slot != kSlotNotFound); 223 break; 224 } // for (attempt) 225 226 PERFETTO_CHECK(insertion_slot < capacity_); 227 228 // We found a free slot (or a tombstone). Proceed with the insertion. 229 Value* value_idx = &values_[insertion_slot]; 230 new (&keys_[insertion_slot]) Key(std::move(key)); 231 new (value_idx) Value(std::move(value)); 232 tags_[insertion_slot] = tag; 233 PERFETTO_DCHECK(probe_len > 0 && probe_len <= capacity_); 234 max_probe_length_ = std::max(max_probe_length_, probe_len); 235 size_++; 236 237 return std::make_pair(value_idx, true); 238 } 239 Find(const Key & key)240 Value* Find(const Key& key) const { 241 const size_t idx = FindInternal(key); 242 if (idx == kNotFound) 243 return nullptr; 244 return &values_[idx]; 245 } 246 Erase(const Key & key)247 bool Erase(const Key& key) { 248 if (AppendOnly) 249 PERFETTO_FATAL("Erase() not supported because AppendOnly=true"); 250 size_t idx = FindInternal(key); 251 if (idx == kNotFound) 252 return false; 253 EraseInternal(idx); 254 return true; 255 } 256 Clear()257 void Clear() { 258 // Avoid trivial heap operations on zero-capacity std::move()-d objects. 259 if (PERFETTO_UNLIKELY(capacity_ == 0)) 260 return; 261 262 for (size_t i = 0; i < capacity_; ++i) { 263 const uint8_t tag = tags_[i]; 264 if (tag != kFreeSlot && tag != kTombstone) 265 EraseInternal(i); 266 } 267 // Clear all tombstones. We really need to do this for AppendOnly. 268 MaybeGrowAndRehash(/*grow=*/false); 269 } 270 271 Value& operator[](Key key) { 272 auto it_and_inserted = Insert(std::move(key), Value{}); 273 return *it_and_inserted.first; 274 } 275 GetIterator()276 Iterator GetIterator() { return Iterator(this); } GetIterator()277 const Iterator GetIterator() const { return Iterator(this); } 278 size()279 size_t size() const { return size_; } capacity()280 size_t capacity() const { return capacity_; } 281 282 // "protected" here is only for the flat_hash_map_benchmark.cc. Everything 283 // below is by all means private. 284 protected: 285 enum ReservedTags : uint8_t { kFreeSlot = 0, kTombstone = 1 }; 286 static constexpr size_t kNotFound = std::numeric_limits<size_t>::max(); 287 FindInternal(const Key & key)288 size_t FindInternal(const Key& key) const { 289 const size_t key_hash = Hasher{}(key); 290 const uint8_t tag = HashToTag(key_hash); 291 PERFETTO_DCHECK((capacity_ & (capacity_ - 1)) == 0); // Must be a pow2. 292 PERFETTO_DCHECK(max_probe_length_ <= capacity_); 293 for (size_t i = 0; i < max_probe_length_; ++i) { 294 const size_t idx = Probe::Calc(key_hash, i, capacity_); 295 const uint8_t tag_idx = tags_[idx]; 296 297 if (tag_idx == kFreeSlot) 298 return kNotFound; 299 // HashToTag() never returns kTombstone, so the tag-check below cannot 300 // possibly match. Also we just want to skip tombstones. 301 if (tag_idx == tag && keys_[idx] == key) { 302 PERFETTO_DCHECK(tag_idx > kTombstone); 303 return idx; 304 } 305 } // for (idx) 306 return kNotFound; 307 } 308 EraseInternal(size_t idx)309 void EraseInternal(size_t idx) { 310 PERFETTO_DCHECK(tags_[idx] > kTombstone); 311 PERFETTO_DCHECK(size_ > 0); 312 tags_[idx] = kTombstone; 313 keys_[idx].~Key(); 314 values_[idx].~Value(); 315 size_--; 316 } 317 MaybeGrowAndRehash(bool grow)318 PERFETTO_NO_INLINE void MaybeGrowAndRehash(bool grow) { 319 PERFETTO_DCHECK(size_ <= capacity_); 320 const size_t old_capacity = capacity_; 321 322 // Grow quickly up to 1MB, then chill. 323 const size_t old_size_bytes = old_capacity * (sizeof(Key) + sizeof(Value)); 324 const size_t grow_factor = old_size_bytes < (1024u * 1024u) ? 8 : 2; 325 const size_t new_capacity = 326 grow ? std::max(old_capacity * grow_factor, size_t(1024)) 327 : old_capacity; 328 329 auto old_tags(std::move(tags_)); 330 auto old_keys(std::move(keys_)); 331 auto old_values(std::move(values_)); 332 size_t old_size = size_; 333 334 // This must be a CHECK (i.e. not just a DCHECK) to prevent UAF attacks on 335 // 32-bit archs that try to double the size of the table until wrapping. 336 PERFETTO_CHECK(new_capacity >= old_capacity); 337 Reset(new_capacity); 338 339 size_t new_size = 0; // Recompute the size. 340 for (size_t i = 0; i < old_capacity; ++i) { 341 const uint8_t old_tag = old_tags[i]; 342 if (old_tag != kFreeSlot && old_tag != kTombstone) { 343 Insert(std::move(old_keys[i]), std::move(old_values[i])); 344 old_keys[i].~Key(); // Destroy the old objects. 345 old_values[i].~Value(); 346 new_size++; 347 } 348 } 349 PERFETTO_DCHECK(new_size == old_size); 350 size_ = new_size; 351 } 352 353 // Doesn't call destructors. Use Clear() for that. Reset(size_t n)354 PERFETTO_NO_INLINE void Reset(size_t n) { 355 PERFETTO_DCHECK((n & (n - 1)) == 0); // Must be a pow2. 356 357 capacity_ = n; 358 max_probe_length_ = 0; 359 size_ = 0; 360 load_limit_ = n * static_cast<size_t>(load_limit_percent_) / 100; 361 load_limit_ = std::min(load_limit_, n); 362 363 tags_.reset(new uint8_t[n]); 364 memset(&tags_[0], 0, n); // Clear all tags. 365 keys_ = AlignedAllocTyped<Key[]>(n); // Deliberately not 0-initialized. 366 values_ = AlignedAllocTyped<Value[]>(n); // Deliberately not 0-initialized. 367 } 368 HashToTag(size_t full_hash)369 static inline uint8_t HashToTag(size_t full_hash) { 370 uint8_t tag = full_hash >> (sizeof(full_hash) * 8 - 8); 371 // Ensure the hash is always >= 2. We use 0, 1 for kFreeSlot and kTombstone. 372 tag += (tag <= kTombstone) << 1; 373 PERFETTO_DCHECK(tag > kTombstone); 374 return tag; 375 } 376 377 size_t capacity_ = 0; 378 size_t size_ = 0; 379 size_t max_probe_length_ = 0; 380 size_t load_limit_ = 0; // Updated every time |capacity_| changes. 381 int load_limit_percent_ = 382 kDefaultLoadLimitPct; // Load factor limit in % of |capacity_|. 383 384 // These arrays have always the |capacity_| elements. 385 // Note: AlignedUniquePtr just allocates memory, doesn't invoke any ctor/dtor. 386 std::unique_ptr<uint8_t[]> tags_; 387 AlignedUniquePtr<Key[]> keys_; 388 AlignedUniquePtr<Value[]> values_; 389 }; 390 391 } // namespace base 392 } // namespace perfetto 393 394 #endif // INCLUDE_PERFETTO_EXT_BASE_FLAT_HASH_MAP_H_ 395