1 /* 2 * Copyright (C) 2021 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef INCLUDE_PERFETTO_EXT_BASE_FLAT_HASH_MAP_H_ 18 #define INCLUDE_PERFETTO_EXT_BASE_FLAT_HASH_MAP_H_ 19 20 #include "perfetto/base/compiler.h" 21 #include "perfetto/base/logging.h" 22 #include "perfetto/ext/base/hash.h" 23 #include "perfetto/ext/base/utils.h" 24 25 #include <algorithm> 26 #include <functional> 27 #include <limits> 28 29 namespace perfetto { 30 namespace base { 31 32 // An open-addressing hashmap implementation. 33 // Pointers are not stable, neither for keys nor values. 34 // Has similar performances of a RobinHood hash (without the complications) 35 // and 2x an unordered map. 36 // Doc: http://go/perfetto-hashtables . 37 // 38 // When used to implement a string pool in TraceProcessor, the performance 39 // characteristics obtained by replaying the set of strings seeen in a 4GB trace 40 // (226M strings, 1M unique) are the following (see flat_hash_map_benchmark.cc): 41 // This(Linear+AppendOnly) 879,383,676 ns 258.013M insertions/s 42 // This(LinearProbe): 909,206,047 ns 249.546M insertions/s 43 // This(QuadraticProbe): 1,083,844,388 ns 209.363M insertions/s 44 // std::unordered_map: 6,203,351,870 ns 36.5811M insertions/s 45 // tsl::robin_map: 931,403,397 ns 243.622M insertions/s 46 // absl::flat_hash_map: 998,013,459 ns 227.379M insertions/s 47 // FollyF14FastMap: 1,181,480,602 ns 192.074M insertions/s 48 49 // The structs below define the probing algorithm used to probe slots upon a 50 // collision. They are guaranteed to visit all slots as our table size is always 51 // a power of two (see https://en.wikipedia.org/wiki/Quadratic_probing). 52 53 // Linear probing can be faster if the hashing is well distributed and the load 54 // is not high. For TraceProcessor's StringPool this is the fastest. It can 55 // degenerate badly if the hashing doesn't spread (e.g., if using directly pids 56 // as keys, with a no-op hashing function). 57 struct LinearProbe { CalcLinearProbe58 static inline size_t Calc(size_t key_hash, size_t step, size_t capacity) { 59 return (key_hash + step) & (capacity - 1); // Linear probe 60 } 61 }; 62 63 // Generates the sequence: 0, 3, 10, 21, 36, 55, ... 64 // Can be a bit (~5%) slower than LinearProbe because it's less cache hot, but 65 // avoids degenerating badly if the hash function is bad and causes clusters. 66 // A good default choice unless benchmarks prove otherwise. 67 struct QuadraticProbe { CalcQuadraticProbe68 static inline size_t Calc(size_t key_hash, size_t step, size_t capacity) { 69 return (key_hash + 2 * step * step + step) & (capacity - 1); 70 } 71 }; 72 73 // Tends to perform in the middle between linear and quadratic. 74 // It's a bit more cache-effective than the QuadraticProbe but can create more 75 // clustering if the hash function doesn't spread well. 76 // Generates the sequence: 0, 1, 3, 6, 10, 15, 21, ... 77 struct QuadraticHalfProbe { CalcQuadraticHalfProbe78 static inline size_t Calc(size_t key_hash, size_t step, size_t capacity) { 79 return (key_hash + (step * step + step) / 2) & (capacity - 1); 80 } 81 }; 82 83 template <typename Key, 84 typename Value, 85 typename Hasher = base::Hash<Key>, 86 typename Probe = QuadraticProbe, 87 bool AppendOnly = false> 88 class FlatHashMap { 89 public: 90 class Iterator { 91 public: Iterator(const FlatHashMap * map)92 explicit Iterator(const FlatHashMap* map) : map_(map) { FindNextNonFree(); } 93 ~Iterator() = default; 94 Iterator(const Iterator&) = default; 95 Iterator& operator=(const Iterator&) = default; 96 Iterator(Iterator&&) noexcept = default; 97 Iterator& operator=(Iterator&&) noexcept = default; 98 key()99 Key& key() { return map_->keys_[idx_]; } value()100 Value& value() { return map_->values_[idx_]; } key()101 const Key& key() const { return map_->keys_[idx_]; } value()102 const Value& value() const { return map_->values_[idx_]; } 103 104 explicit operator bool() const { return idx_ != kEnd; } 105 Iterator& operator++() { 106 PERFETTO_DCHECK(idx_ < map_->capacity_); 107 ++idx_; 108 FindNextNonFree(); 109 return *this; 110 } 111 112 private: 113 static constexpr size_t kEnd = std::numeric_limits<size_t>::max(); 114 FindNextNonFree()115 void FindNextNonFree() { 116 const auto& tags = map_->tags_; 117 for (; idx_ < map_->capacity_; idx_++) { 118 if (tags[idx_] != kFreeSlot && (AppendOnly || tags[idx_] != kTombstone)) 119 return; 120 } 121 idx_ = kEnd; 122 } 123 124 const FlatHashMap* map_ = nullptr; 125 size_t idx_ = 0; 126 }; // Iterator 127 128 static constexpr int kDefaultLoadLimitPct = 75; 129 explicit FlatHashMap(size_t initial_capacity = 0, 130 int load_limit_pct = kDefaultLoadLimitPct) load_limit_percent_(load_limit_pct)131 : load_limit_percent_(load_limit_pct) { 132 if (initial_capacity > 0) 133 Reset(initial_capacity); 134 } 135 136 // We are calling Clear() so that the destructors for the inserted entries are 137 // called (unless they are trivial, in which case it will be a no-op). ~FlatHashMap()138 ~FlatHashMap() { Clear(); } 139 FlatHashMap(FlatHashMap && other)140 FlatHashMap(FlatHashMap&& other) noexcept { 141 tags_ = std::move(other.tags_); 142 keys_ = std::move(other.keys_); 143 values_ = std::move(other.values_); 144 capacity_ = other.capacity_; 145 size_ = other.size_; 146 max_probe_length_ = other.max_probe_length_; 147 load_limit_ = other.load_limit_; 148 load_limit_percent_ = other.load_limit_percent_; 149 150 new (&other) FlatHashMap(); 151 } 152 153 FlatHashMap& operator=(FlatHashMap&& other) noexcept { 154 this->~FlatHashMap(); 155 new (this) FlatHashMap(std::move(other)); 156 return *this; 157 } 158 159 FlatHashMap(const FlatHashMap&) = delete; 160 FlatHashMap& operator=(const FlatHashMap&) = delete; 161 Insert(Key key,Value value)162 std::pair<Value*, bool> Insert(Key key, Value value) { 163 const size_t key_hash = Hasher{}(key); 164 const uint8_t tag = HashToTag(key_hash); 165 static constexpr size_t kSlotNotFound = std::numeric_limits<size_t>::max(); 166 167 // This for loop does in reality at most two attempts: 168 // The first iteration either: 169 // - Early-returns, because the key exists already, 170 // - Finds an insertion slot and proceeds because the load is < limit. 171 // The second iteration is only hit in the unlikely case of this insertion 172 // bringing the table beyond the target |load_limit_| (or the edge case 173 // of the HT being full, if |load_limit_pct_| = 100). 174 // We cannot simply pre-grow the table before insertion, because we must 175 // guarantee that calling Insert() with a key that already exists doesn't 176 // invalidate iterators. 177 size_t insertion_slot; 178 size_t probe_len; 179 for (;;) { 180 PERFETTO_DCHECK((capacity_ & (capacity_ - 1)) == 0); // Must be a pow2. 181 insertion_slot = kSlotNotFound; 182 // Start the iteration at the desired slot (key_hash % capacity_) 183 // searching either for a free slot or a tombstone. In the worst case we 184 // might end up scanning the whole array of slots. The Probe functions are 185 // guaranteed to visit all the slots within |capacity_| steps. If we find 186 // a free slot, we can stop the search immediately (a free slot acts as an 187 // "end of chain for entries having the same hash". If we find a 188 // tombstones (a deleted slot) we remember its position, but have to keep 189 // searching until a free slot to make sure we don't insert a duplicate 190 // key. 191 for (probe_len = 0; probe_len < capacity_;) { 192 const size_t idx = Probe::Calc(key_hash, probe_len, capacity_); 193 PERFETTO_DCHECK(idx < capacity_); 194 const uint8_t tag_idx = tags_[idx]; 195 ++probe_len; 196 if (tag_idx == kFreeSlot) { 197 // Rationale for "insertion_slot == kSlotNotFound": if we encountered 198 // a tombstone while iterating we should reuse that rather than 199 // taking another slot. 200 if (AppendOnly || insertion_slot == kSlotNotFound) 201 insertion_slot = idx; 202 break; 203 } 204 // We should never encounter tombstones in AppendOnly mode. 205 PERFETTO_DCHECK(!(tag_idx == kTombstone && AppendOnly)); 206 if (!AppendOnly && tag_idx == kTombstone) { 207 insertion_slot = idx; 208 continue; 209 } 210 if (tag_idx == tag && keys_[idx] == key) { 211 // The key is already in the map. 212 return std::make_pair(&values_[idx], false); 213 } 214 } // for (idx) 215 216 // If we got to this point the key does not exist (otherwise we would have 217 // hit the return above) and we are going to insert a new entry. 218 // Before doing so, ensure we stay under the target load limit. 219 if (PERFETTO_UNLIKELY(size_ >= load_limit_)) { 220 MaybeGrowAndRehash(/*grow=*/true); 221 continue; 222 } 223 PERFETTO_DCHECK(insertion_slot != kSlotNotFound); 224 break; 225 } // for (attempt) 226 227 PERFETTO_CHECK(insertion_slot < capacity_); 228 229 // We found a free slot (or a tombstone). Proceed with the insertion. 230 Value* value_idx = &values_[insertion_slot]; 231 new (&keys_[insertion_slot]) Key(std::move(key)); 232 new (value_idx) Value(std::move(value)); 233 tags_[insertion_slot] = tag; 234 PERFETTO_DCHECK(probe_len > 0 && probe_len <= capacity_); 235 max_probe_length_ = std::max(max_probe_length_, probe_len); 236 size_++; 237 238 return std::make_pair(value_idx, true); 239 } 240 Find(const Key & key)241 Value* Find(const Key& key) const { 242 const size_t idx = FindInternal(key); 243 if (idx == kNotFound) 244 return nullptr; 245 return &values_[idx]; 246 } 247 Erase(const Key & key)248 bool Erase(const Key& key) { 249 if (AppendOnly) 250 PERFETTO_FATAL("Erase() not supported because AppendOnly=true"); 251 size_t idx = FindInternal(key); 252 if (idx == kNotFound) 253 return false; 254 EraseInternal(idx); 255 return true; 256 } 257 Clear()258 void Clear() { 259 // Avoid trivial heap operations on zero-capacity std::move()-d objects. 260 if (PERFETTO_UNLIKELY(capacity_ == 0)) 261 return; 262 263 for (size_t i = 0; i < capacity_; ++i) { 264 const uint8_t tag = tags_[i]; 265 if (tag != kFreeSlot && tag != kTombstone) 266 EraseInternal(i); 267 } 268 // Clear all tombstones. We really need to do this for AppendOnly. 269 MaybeGrowAndRehash(/*grow=*/false); 270 } 271 272 Value& operator[](Key key) { 273 auto it_and_inserted = Insert(std::move(key), Value{}); 274 return *it_and_inserted.first; 275 } 276 GetIterator()277 Iterator GetIterator() { return Iterator(this); } GetIterator()278 const Iterator GetIterator() const { return Iterator(this); } 279 size()280 size_t size() const { return size_; } capacity()281 size_t capacity() const { return capacity_; } 282 283 // "protected" here is only for the flat_hash_map_benchmark.cc. Everything 284 // below is by all means private. 285 protected: 286 enum ReservedTags : uint8_t { kFreeSlot = 0, kTombstone = 1 }; 287 static constexpr size_t kNotFound = std::numeric_limits<size_t>::max(); 288 FindInternal(const Key & key)289 size_t FindInternal(const Key& key) const { 290 const size_t key_hash = Hasher{}(key); 291 const uint8_t tag = HashToTag(key_hash); 292 PERFETTO_DCHECK((capacity_ & (capacity_ - 1)) == 0); // Must be a pow2. 293 PERFETTO_DCHECK(max_probe_length_ <= capacity_); 294 for (size_t i = 0; i < max_probe_length_; ++i) { 295 const size_t idx = Probe::Calc(key_hash, i, capacity_); 296 const uint8_t tag_idx = tags_[idx]; 297 298 if (tag_idx == kFreeSlot) 299 return kNotFound; 300 // HashToTag() never returns kTombstone, so the tag-check below cannot 301 // possibly match. Also we just want to skip tombstones. 302 if (tag_idx == tag && keys_[idx] == key) { 303 PERFETTO_DCHECK(tag_idx > kTombstone); 304 return idx; 305 } 306 } // for (idx) 307 return kNotFound; 308 } 309 EraseInternal(size_t idx)310 void EraseInternal(size_t idx) { 311 PERFETTO_DCHECK(tags_[idx] > kTombstone); 312 PERFETTO_DCHECK(size_ > 0); 313 tags_[idx] = kTombstone; 314 keys_[idx].~Key(); 315 values_[idx].~Value(); 316 size_--; 317 } 318 MaybeGrowAndRehash(bool grow)319 PERFETTO_NO_INLINE void MaybeGrowAndRehash(bool grow) { 320 PERFETTO_DCHECK(size_ <= capacity_); 321 const size_t old_capacity = capacity_; 322 323 // Grow quickly up to 1MB, then chill. 324 const size_t old_size_bytes = old_capacity * (sizeof(Key) + sizeof(Value)); 325 const size_t grow_factor = old_size_bytes < (1024u * 1024u) ? 8 : 2; 326 const size_t new_capacity = 327 grow ? std::max(old_capacity * grow_factor, size_t(1024)) 328 : old_capacity; 329 330 auto old_tags(std::move(tags_)); 331 auto old_keys(std::move(keys_)); 332 auto old_values(std::move(values_)); 333 size_t old_size = size_; 334 335 // This must be a CHECK (i.e. not just a DCHECK) to prevent UAF attacks on 336 // 32-bit archs that try to double the size of the table until wrapping. 337 PERFETTO_CHECK(new_capacity >= old_capacity); 338 Reset(new_capacity); 339 340 size_t new_size = 0; // Recompute the size. 341 for (size_t i = 0; i < old_capacity; ++i) { 342 const uint8_t old_tag = old_tags[i]; 343 if (old_tag != kFreeSlot && old_tag != kTombstone) { 344 Insert(std::move(old_keys[i]), std::move(old_values[i])); 345 old_keys[i].~Key(); // Destroy the old objects. 346 old_values[i].~Value(); 347 new_size++; 348 } 349 } 350 PERFETTO_DCHECK(new_size == old_size); 351 size_ = new_size; 352 } 353 354 // Doesn't call destructors. Use Clear() for that. Reset(size_t n)355 PERFETTO_NO_INLINE void Reset(size_t n) { 356 PERFETTO_DCHECK((n & (n - 1)) == 0); // Must be a pow2. 357 358 capacity_ = n; 359 max_probe_length_ = 0; 360 size_ = 0; 361 load_limit_ = n * static_cast<size_t>(load_limit_percent_) / 100; 362 load_limit_ = std::min(load_limit_, n); 363 364 tags_.reset(new uint8_t[n]); 365 memset(&tags_[0], 0, n); // Clear all tags. 366 keys_ = AlignedAllocTyped<Key[]>(n); // Deliberately not 0-initialized. 367 values_ = AlignedAllocTyped<Value[]>(n); // Deliberately not 0-initialized. 368 } 369 HashToTag(size_t full_hash)370 static inline uint8_t HashToTag(size_t full_hash) { 371 uint8_t tag = full_hash >> (sizeof(full_hash) * 8 - 8); 372 // Ensure the hash is always >= 2. We use 0, 1 for kFreeSlot and kTombstone. 373 tag += (tag <= kTombstone) << 1; 374 PERFETTO_DCHECK(tag > kTombstone); 375 return tag; 376 } 377 378 size_t capacity_ = 0; 379 size_t size_ = 0; 380 size_t max_probe_length_ = 0; 381 size_t load_limit_ = 0; // Updated every time |capacity_| changes. 382 int load_limit_percent_ = 383 kDefaultLoadLimitPct; // Load factor limit in % of |capacity_|. 384 385 // These arrays have always the |capacity_| elements. 386 // Note: AlignedUniquePtr just allocates memory, doesn't invoke any ctor/dtor. 387 std::unique_ptr<uint8_t[]> tags_; 388 AlignedUniquePtr<Key[]> keys_; 389 AlignedUniquePtr<Value[]> values_; 390 }; 391 392 } // namespace base 393 } // namespace perfetto 394 395 #endif // INCLUDE_PERFETTO_EXT_BASE_FLAT_HASH_MAP_H_ 396