1 /* 2 * Copyright (C) 2021 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef INCLUDE_PERFETTO_EXT_BASE_FLAT_HASH_MAP_H_ 18 #define INCLUDE_PERFETTO_EXT_BASE_FLAT_HASH_MAP_H_ 19 20 #include "perfetto/base/compiler.h" 21 #include "perfetto/base/logging.h" 22 #include "perfetto/ext/base/hash.h" 23 #include "perfetto/ext/base/utils.h" 24 25 #include <algorithm> 26 #include <limits> 27 28 namespace perfetto { 29 namespace base { 30 31 // An open-addressing hashmap implementation. 32 // Pointers are not stable, neither for keys nor values. 33 // Has similar performances of a RobinHood hash (without the complications) 34 // and 2x an unordered map. 35 // Doc: http://go/perfetto-hashtables . 36 // 37 // When used to implement a string pool in TraceProcessor, the performance 38 // characteristics obtained by replaying the set of strings seeen in a 4GB trace 39 // (226M strings, 1M unique) are the following (see flat_hash_map_benchmark.cc): 40 // This(Linear+AppendOnly) 879,383,676 ns 258.013M insertions/s 41 // This(LinearProbe): 909,206,047 ns 249.546M insertions/s 42 // This(QuadraticProbe): 1,083,844,388 ns 209.363M insertions/s 43 // std::unordered_map: 6,203,351,870 ns 36.5811M insertions/s 44 // tsl::robin_map: 931,403,397 ns 243.622M insertions/s 45 // absl::flat_hash_map: 998,013,459 ns 227.379M insertions/s 46 // FollyF14FastMap: 1,181,480,602 ns 192.074M insertions/s 47 // 48 // TODO(primiano): the table regresses for heavy insert+erase workloads since we 49 // don't clean up tombstones outside of resizes. In the limit, the entire 50 // table's capacity is made up of values/tombstones, so each search has to 51 // exhaustively scan the full capacity. 52 53 // The structs below define the probing algorithm used to probe slots upon a 54 // collision. They are guaranteed to visit all slots as our table size is always 55 // a power of two (see https://en.wikipedia.org/wiki/Quadratic_probing). 56 57 // Linear probing can be faster if the hashing is well distributed and the load 58 // is not high. For TraceProcessor's StringPool this is the fastest. It can 59 // degenerate badly if the hashing doesn't spread (e.g., if using directly pids 60 // as keys, with a no-op hashing function). 61 struct LinearProbe { CalcLinearProbe62 static inline size_t Calc(size_t key_hash, size_t step, size_t capacity) { 63 return (key_hash + step) & (capacity - 1); // Linear probe 64 } 65 }; 66 67 // Generates the sequence: 0, 3, 10, 21, 36, 55, ... 68 // Can be a bit (~5%) slower than LinearProbe because it's less cache hot, but 69 // avoids degenerating badly if the hash function is bad and causes clusters. 70 // A good default choice unless benchmarks prove otherwise. 71 struct QuadraticProbe { CalcQuadraticProbe72 static inline size_t Calc(size_t key_hash, size_t step, size_t capacity) { 73 return (key_hash + 2 * step * step + step) & (capacity - 1); 74 } 75 }; 76 77 // Tends to perform in the middle between linear and quadratic. 78 // It's a bit more cache-effective than the QuadraticProbe but can create more 79 // clustering if the hash function doesn't spread well. 80 // Generates the sequence: 0, 1, 3, 6, 10, 15, 21, ... 81 struct QuadraticHalfProbe { CalcQuadraticHalfProbe82 static inline size_t Calc(size_t key_hash, size_t step, size_t capacity) { 83 return (key_hash + (step * step + step) / 2) & (capacity - 1); 84 } 85 }; 86 87 template <typename Key, 88 typename Value, 89 typename Hasher = base::Hash<Key>, 90 typename Probe = QuadraticProbe, 91 bool AppendOnly = false> 92 class FlatHashMap { 93 public: 94 class Iterator { 95 public: Iterator(const FlatHashMap * map)96 explicit Iterator(const FlatHashMap* map) : map_(map) { FindNextNonFree(); } 97 ~Iterator() = default; 98 Iterator(const Iterator&) = default; 99 Iterator& operator=(const Iterator&) = default; 100 Iterator(Iterator&&) noexcept = default; 101 Iterator& operator=(Iterator&&) noexcept = default; 102 key()103 Key& key() { return map_->keys_[idx_]; } value()104 Value& value() { return map_->values_[idx_]; } key()105 const Key& key() const { return map_->keys_[idx_]; } value()106 const Value& value() const { return map_->values_[idx_]; } 107 108 explicit operator bool() const { return idx_ != kEnd; } 109 Iterator& operator++() { 110 PERFETTO_DCHECK(idx_ < map_->capacity_); 111 ++idx_; 112 FindNextNonFree(); 113 return *this; 114 } 115 116 private: 117 static constexpr size_t kEnd = std::numeric_limits<size_t>::max(); 118 FindNextNonFree()119 void FindNextNonFree() { 120 const auto& tags = map_->tags_; 121 for (; idx_ < map_->capacity_; idx_++) { 122 if (tags[idx_] != kFreeSlot && (AppendOnly || tags[idx_] != kTombstone)) 123 return; 124 } 125 idx_ = kEnd; 126 } 127 128 const FlatHashMap* map_ = nullptr; 129 size_t idx_ = 0; 130 }; // Iterator 131 132 static constexpr int kDefaultLoadLimitPct = 75; 133 explicit FlatHashMap(size_t initial_capacity = 0, 134 int load_limit_pct = kDefaultLoadLimitPct) load_limit_percent_(load_limit_pct)135 : load_limit_percent_(load_limit_pct) { 136 if (initial_capacity > 0) 137 Reset(initial_capacity); 138 } 139 140 // We are calling Clear() so that the destructors for the inserted entries are 141 // called (unless they are trivial, in which case it will be a no-op). ~FlatHashMap()142 ~FlatHashMap() { Clear(); } 143 FlatHashMap(FlatHashMap && other)144 FlatHashMap(FlatHashMap&& other) noexcept { 145 tags_ = std::move(other.tags_); 146 keys_ = std::move(other.keys_); 147 values_ = std::move(other.values_); 148 capacity_ = other.capacity_; 149 size_ = other.size_; 150 max_probe_length_ = other.max_probe_length_; 151 load_limit_ = other.load_limit_; 152 load_limit_percent_ = other.load_limit_percent_; 153 154 new (&other) FlatHashMap(); 155 } 156 157 FlatHashMap& operator=(FlatHashMap&& other) noexcept { 158 this->~FlatHashMap(); 159 new (this) FlatHashMap(std::move(other)); 160 return *this; 161 } 162 163 FlatHashMap(const FlatHashMap&) = delete; 164 FlatHashMap& operator=(const FlatHashMap&) = delete; 165 Insert(Key key,Value value)166 std::pair<Value*, bool> Insert(Key key, Value value) { 167 const size_t key_hash = Hasher{}(key); 168 const uint8_t tag = HashToTag(key_hash); 169 static constexpr size_t kSlotNotFound = std::numeric_limits<size_t>::max(); 170 171 // This for loop does in reality at most two attempts: 172 // The first iteration either: 173 // - Early-returns, because the key exists already, 174 // - Finds an insertion slot and proceeds because the load is < limit. 175 // The second iteration is only hit in the unlikely case of this insertion 176 // bringing the table beyond the target |load_limit_| (or the edge case 177 // of the HT being full, if |load_limit_pct_| = 100). 178 // We cannot simply pre-grow the table before insertion, because we must 179 // guarantee that calling Insert() with a key that already exists doesn't 180 // invalidate iterators. 181 size_t insertion_slot; 182 size_t probe_len; 183 for (;;) { 184 PERFETTO_DCHECK((capacity_ & (capacity_ - 1)) == 0); // Must be a pow2. 185 insertion_slot = kSlotNotFound; 186 // Start the iteration at the desired slot (key_hash % capacity_) 187 // searching either for a free slot or a tombstone. In the worst case we 188 // might end up scanning the whole array of slots. The Probe functions are 189 // guaranteed to visit all the slots within |capacity_| steps. If we find 190 // a free slot, we can stop the search immediately (a free slot acts as an 191 // "end of chain for entries having the same hash". If we find a 192 // tombstones (a deleted slot) we remember its position, but have to keep 193 // searching until a free slot to make sure we don't insert a duplicate 194 // key. 195 for (probe_len = 0; probe_len < capacity_;) { 196 const size_t idx = Probe::Calc(key_hash, probe_len, capacity_); 197 PERFETTO_DCHECK(idx < capacity_); 198 const uint8_t tag_idx = tags_[idx]; 199 ++probe_len; 200 if (tag_idx == kFreeSlot) { 201 // Rationale for "insertion_slot == kSlotNotFound": if we encountered 202 // a tombstone while iterating we should reuse that rather than 203 // taking another slot. 204 if (AppendOnly || insertion_slot == kSlotNotFound) 205 insertion_slot = idx; 206 break; 207 } 208 // We should never encounter tombstones in AppendOnly mode. 209 PERFETTO_DCHECK(!(tag_idx == kTombstone && AppendOnly)); 210 if (!AppendOnly && tag_idx == kTombstone) { 211 insertion_slot = idx; 212 continue; 213 } 214 if (tag_idx == tag && keys_[idx] == key) { 215 // The key is already in the map. 216 return std::make_pair(&values_[idx], false); 217 } 218 } // for (idx) 219 220 // If we got to this point the key does not exist (otherwise we would have 221 // hit the return above) and we are going to insert a new entry. 222 // Before doing so, ensure we stay under the target load limit. 223 if (PERFETTO_UNLIKELY(size_ >= load_limit_)) { 224 MaybeGrowAndRehash(/*grow=*/true); 225 continue; 226 } 227 PERFETTO_DCHECK(insertion_slot != kSlotNotFound); 228 break; 229 } // for (attempt) 230 231 PERFETTO_CHECK(insertion_slot < capacity_); 232 233 // We found a free slot (or a tombstone). Proceed with the insertion. 234 Value* value_idx = &values_[insertion_slot]; 235 new (&keys_[insertion_slot]) Key(std::move(key)); 236 new (value_idx) Value(std::move(value)); 237 tags_[insertion_slot] = tag; 238 PERFETTO_DCHECK(probe_len > 0 && probe_len <= capacity_); 239 max_probe_length_ = std::max(max_probe_length_, probe_len); 240 size_++; 241 242 return std::make_pair(value_idx, true); 243 } 244 Find(const Key & key)245 Value* Find(const Key& key) const { 246 const size_t idx = FindInternal(key); 247 if (idx == kNotFound) 248 return nullptr; 249 return &values_[idx]; 250 } 251 Erase(const Key & key)252 bool Erase(const Key& key) { 253 if (AppendOnly) 254 PERFETTO_FATAL("Erase() not supported because AppendOnly=true"); 255 size_t idx = FindInternal(key); 256 if (idx == kNotFound) 257 return false; 258 EraseInternal(idx); 259 return true; 260 } 261 Clear()262 void Clear() { 263 // Avoid trivial heap operations on zero-capacity std::move()-d objects. 264 if (PERFETTO_UNLIKELY(capacity_ == 0)) 265 return; 266 267 for (size_t i = 0; i < capacity_; ++i) { 268 const uint8_t tag = tags_[i]; 269 if (tag != kFreeSlot && tag != kTombstone) 270 EraseInternal(i); 271 } 272 // Clear all tombstones. We really need to do this for AppendOnly. 273 MaybeGrowAndRehash(/*grow=*/false); 274 } 275 276 Value& operator[](Key key) { 277 auto it_and_inserted = Insert(std::move(key), Value{}); 278 return *it_and_inserted.first; 279 } 280 GetIterator()281 Iterator GetIterator() { return Iterator(this); } GetIterator()282 const Iterator GetIterator() const { return Iterator(this); } 283 size()284 size_t size() const { return size_; } capacity()285 size_t capacity() const { return capacity_; } 286 287 // "protected" here is only for the flat_hash_map_benchmark.cc. Everything 288 // below is by all means private. 289 protected: 290 enum ReservedTags : uint8_t { kFreeSlot = 0, kTombstone = 1 }; 291 static constexpr size_t kNotFound = std::numeric_limits<size_t>::max(); 292 FindInternal(const Key & key)293 size_t FindInternal(const Key& key) const { 294 const size_t key_hash = Hasher{}(key); 295 const uint8_t tag = HashToTag(key_hash); 296 PERFETTO_DCHECK((capacity_ & (capacity_ - 1)) == 0); // Must be a pow2. 297 PERFETTO_DCHECK(max_probe_length_ <= capacity_); 298 for (size_t i = 0; i < max_probe_length_; ++i) { 299 const size_t idx = Probe::Calc(key_hash, i, capacity_); 300 const uint8_t tag_idx = tags_[idx]; 301 302 if (tag_idx == kFreeSlot) 303 return kNotFound; 304 // HashToTag() never returns kTombstone, so the tag-check below cannot 305 // possibly match. Also we just want to skip tombstones. 306 if (tag_idx == tag && keys_[idx] == key) { 307 PERFETTO_DCHECK(tag_idx > kTombstone); 308 return idx; 309 } 310 } // for (idx) 311 return kNotFound; 312 } 313 EraseInternal(size_t idx)314 void EraseInternal(size_t idx) { 315 PERFETTO_DCHECK(tags_[idx] > kTombstone); 316 PERFETTO_DCHECK(size_ > 0); 317 tags_[idx] = kTombstone; 318 keys_[idx].~Key(); 319 values_[idx].~Value(); 320 size_--; 321 } 322 MaybeGrowAndRehash(bool grow)323 PERFETTO_NO_INLINE void MaybeGrowAndRehash(bool grow) { 324 PERFETTO_DCHECK(size_ <= capacity_); 325 const size_t old_capacity = capacity_; 326 327 // Grow quickly up to 1MB, then chill. 328 const size_t old_size_bytes = old_capacity * (sizeof(Key) + sizeof(Value)); 329 const size_t grow_factor = old_size_bytes < (1024u * 1024u) ? 8 : 2; 330 const size_t new_capacity = 331 grow ? std::max(old_capacity * grow_factor, size_t(1024)) 332 : old_capacity; 333 334 auto old_tags(std::move(tags_)); 335 auto old_keys(std::move(keys_)); 336 auto old_values(std::move(values_)); 337 size_t old_size = size_; 338 339 // This must be a CHECK (i.e. not just a DCHECK) to prevent UAF attacks on 340 // 32-bit archs that try to double the size of the table until wrapping. 341 PERFETTO_CHECK(new_capacity >= old_capacity); 342 Reset(new_capacity); 343 344 size_t new_size = 0; // Recompute the size. 345 for (size_t i = 0; i < old_capacity; ++i) { 346 const uint8_t old_tag = old_tags[i]; 347 if (old_tag != kFreeSlot && old_tag != kTombstone) { 348 Insert(std::move(old_keys[i]), std::move(old_values[i])); 349 old_keys[i].~Key(); // Destroy the old objects. 350 old_values[i].~Value(); 351 new_size++; 352 } 353 } 354 PERFETTO_DCHECK(new_size == old_size); 355 size_ = new_size; 356 } 357 358 // Doesn't call destructors. Use Clear() for that. Reset(size_t n)359 PERFETTO_NO_INLINE void Reset(size_t n) { 360 PERFETTO_DCHECK((n & (n - 1)) == 0); // Must be a pow2. 361 362 capacity_ = n; 363 max_probe_length_ = 0; 364 size_ = 0; 365 load_limit_ = n * static_cast<size_t>(load_limit_percent_) / 100; 366 load_limit_ = std::min(load_limit_, n); 367 368 tags_.reset(new uint8_t[n]); 369 memset(&tags_[0], 0, n); // Clear all tags. 370 keys_ = AlignedAllocTyped<Key[]>(n); // Deliberately not 0-initialized. 371 values_ = AlignedAllocTyped<Value[]>(n); // Deliberately not 0-initialized. 372 } 373 HashToTag(size_t full_hash)374 static inline uint8_t HashToTag(size_t full_hash) { 375 uint8_t tag = full_hash >> (sizeof(full_hash) * 8 - 8); 376 // Ensure the hash is always >= 2. We use 0, 1 for kFreeSlot and kTombstone. 377 tag += (tag <= kTombstone) << 1; 378 PERFETTO_DCHECK(tag > kTombstone); 379 return tag; 380 } 381 382 size_t capacity_ = 0; 383 size_t size_ = 0; 384 size_t max_probe_length_ = 0; 385 size_t load_limit_ = 0; // Updated every time |capacity_| changes. 386 int load_limit_percent_ = 387 kDefaultLoadLimitPct; // Load factor limit in % of |capacity_|. 388 389 // These arrays have always the |capacity_| elements. 390 // Note: AlignedUniquePtr just allocates memory, doesn't invoke any ctor/dtor. 391 std::unique_ptr<uint8_t[]> tags_; 392 AlignedUniquePtr<Key[]> keys_; 393 AlignedUniquePtr<Value[]> values_; 394 }; 395 396 } // namespace base 397 } // namespace perfetto 398 399 #endif // INCLUDE_PERFETTO_EXT_BASE_FLAT_HASH_MAP_H_ 400