1 // Copyright 2017 the V8 project authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #ifndef V8_COMPILER_PERSISTENT_MAP_H_ 6 #define V8_COMPILER_PERSISTENT_MAP_H_ 7 8 #include <array> 9 #include <tuple> 10 11 #include "src/base/functional.h" 12 #include "src/zone/zone-containers.h" 13 14 namespace v8 { 15 namespace internal { 16 namespace compiler { 17 18 // PersistentMap is a persistent map datastructure based on hash trees (a binary 19 // tree using the bits of a hash value as addresses). The map is a conceptually 20 // infinite: All keys are initially mapped to a default value, values are 21 // deleted by overwriting them with the default value. The iterators produce 22 // exactly the keys that are not the default value. The hash values should have 23 // high variance in their high bits, so dense integers are a bad choice. 24 // Complexity: 25 // - Copy and assignment: O(1) 26 // - access: O(log n) 27 // - update: O(log n) time and space 28 // - iteration: amortized O(1) per step 29 // - Zip: O(n) 30 // - equality check: O(n) 31 // TODO(tebbi): Cache map transitions to avoid re-allocation of the same map. 32 // TODO(tebbi): Implement an O(1) equality check based on hash consing or 33 // something similar. 34 template <class Key, class Value, class Hasher = base::hash<Key>> 35 class PersistentMap { 36 public: 37 using key_type = Key; 38 using mapped_type = Value; 39 using value_type = std::pair<Key, Value>; 40 41 private: 42 static constexpr size_t kHashBits = 32; 43 enum Bit : int { kLeft = 0, kRight = 1 }; 44 45 // Access hash bits starting from the high bits and compare them according to 46 // their unsigned value. This way, the order in the hash tree is compatible 47 // with numeric hash comparisons. 48 class HashValue; 49 50 struct KeyValue : std::pair<Key, Value> { keyKeyValue51 const Key& key() const { return this->first; } valueKeyValue52 const Value& value() const { return this->second; } 53 using std::pair<Key, Value>::pair; 54 }; 55 56 struct FocusedTree; 57 58 public: 59 // Depth of the last added element. This is a cheap estimate for the size of 60 // the hash tree. last_depth()61 size_t last_depth() const { 62 if (tree_) { 63 return tree_->length; 64 } else { 65 return 0; 66 } 67 } 68 Get(const Key & key)69 const Value& Get(const Key& key) const { 70 HashValue key_hash = HashValue(Hasher()(key)); 71 const FocusedTree* tree = FindHash(key_hash); 72 return GetFocusedValue(tree, key); 73 } 74 75 // Add or overwrite an existing key-value pair. 76 void Set(Key key, Value value); 77 78 bool operator==(const PersistentMap& other) const { 79 if (tree_ == other.tree_) return true; 80 if (def_value_ != other.def_value_) return false; 81 for (const std::tuple<Key, Value, Value>& triple : Zip(other)) { 82 if (std::get<1>(triple) != std::get<2>(triple)) return false; 83 } 84 return true; 85 } 86 87 bool operator!=(const PersistentMap& other) const { 88 return !(*this == other); 89 } 90 91 // The iterator produces key-value pairs in the lexicographical order of 92 // hash value and key. It produces exactly the key-value pairs where the value 93 // is not the default value. 94 class iterator; 95 begin()96 iterator begin() const { 97 if (!tree_) return end(); 98 return iterator::begin(tree_, def_value_); 99 } end()100 iterator end() const { return iterator::end(def_value_); } 101 102 // Iterator to traverse two maps in lockstep, producing matching value pairs 103 // for each key where at least one value is different from the respective 104 // default. 105 class double_iterator; 106 107 // An iterable to iterate over the two maps in lockstep. 108 struct ZipIterable { 109 PersistentMap a; 110 PersistentMap b; beginZipIterable111 double_iterator begin() { return double_iterator(a.begin(), b.begin()); } endZipIterable112 double_iterator end() { return double_iterator(a.end(), b.end()); } 113 }; 114 Zip(const PersistentMap & other)115 ZipIterable Zip(const PersistentMap& other) const { return {*this, other}; } 116 117 explicit PersistentMap(Zone* zone, Value def_value = Value()) PersistentMap(nullptr,zone,def_value)118 : PersistentMap(nullptr, zone, def_value) {} 119 120 private: 121 // Find the {FocusedTree} that contains a key-value pair with key hash {hash}. 122 const FocusedTree* FindHash(HashValue hash) const; 123 124 // Find the {FocusedTree} that contains a key-value pair with key hash {hash}. 125 // Output the path to this {FocusedTree} and its length. If no such 126 // {FocusedTree} exists, return {nullptr} and output the path to the last node 127 // with a matching hash prefix. Note that {length} is the length of the found 128 // path and may be less than the length of the found {FocusedTree}. 129 const FocusedTree* FindHash(HashValue hash, 130 std::array<const FocusedTree*, kHashBits>* path, 131 int* length) const; 132 133 // Load value from the leaf node on the focused path of {tree}. 134 const Value& GetFocusedValue(const FocusedTree* tree, const Key& key) const; 135 136 // Return the {FocusedTree} representing the left (bit==kLeft) or right 137 // (bit==kRight) child of the node on the path of {tree} at tree level 138 // {level}. 139 static const FocusedTree* GetChild(const FocusedTree* tree, int level, 140 Bit bit); 141 142 // Find the leftmost path in the tree, starting at the node at tree level 143 // {level} on the path of {start}. Output the level of the leaf to {level} and 144 // the path to {path}. 145 static const FocusedTree* FindLeftmost( 146 const FocusedTree* start, int* level, 147 std::array<const FocusedTree*, kHashBits>* path); 148 PersistentMap(const FocusedTree * tree,Zone * zone,Value def_value)149 PersistentMap(const FocusedTree* tree, Zone* zone, Value def_value) 150 : tree_(tree), def_value_(def_value), zone_(zone) {} 151 152 const FocusedTree* tree_; 153 Value def_value_; 154 Zone* zone_; 155 }; 156 157 // This structure represents a hash tree with one focused path to a specific 158 // leaf. For the focused leaf, it stores key, value and key hash. The path is 159 // defined by the hash bits of the focused leaf. In a traditional tree 160 // datastructure, the nodes of a path form a linked list with the values being 161 // the pointers outside of this path. Instead of storing all of these nodes, 162 // we store an array of the pointers pointing outside of the path. This is 163 // similar to the stack used when doing DFS traversal of a tree. The hash of 164 // the leaf is used to know if the pointers point to the left or the 165 // right of the path. As there is no explicit representation of a tree node, 166 // this structure also represents all the nodes on its path. The intended node 167 // depends on the tree depth, which is always clear from the referencing 168 // context. So the pointer to a {FocusedTree} stored in the 169 // {PersistentMap.tree_} always references the root, while a pointer from a 170 // focused node of another {FocusedTree} always references to one tree level 171 // lower than before. 172 template <class Key, class Value, class Hasher> 173 struct PersistentMap<Key, Value, Hasher>::FocusedTree { 174 KeyValue key_value; 175 // The depth of the focused path, that is, the number of pointers stored in 176 // this structure. 177 int8_t length; 178 HashValue key_hash; 179 // Out-of-line storage for hash collisions. 180 const ZoneMap<Key, Value>* more; 181 using more_iterator = typename ZoneMap<Key, Value>::const_iterator; 182 // {path_array} has to be the last member: To store an array inline, we 183 // over-allocate memory for this structure and access memory beyond 184 // {path_array}. This corresponds to a flexible array member as defined in 185 // C99. 186 const FocusedTree* path_array[1]; 187 const FocusedTree*& path(int i) { 188 DCHECK(i < length); 189 return reinterpret_cast<const FocusedTree**>( 190 reinterpret_cast<byte*>(this) + offsetof(FocusedTree, path_array))[i]; 191 } 192 const FocusedTree* path(int i) const { 193 DCHECK(i < length); 194 return reinterpret_cast<const FocusedTree* const*>( 195 reinterpret_cast<const byte*>(this) + 196 offsetof(FocusedTree, path_array))[i]; 197 } 198 }; 199 200 template <class Key, class Value, class Hasher> 201 class PersistentMap<Key, Value, Hasher>::HashValue { 202 public: 203 explicit HashValue(size_t hash) : bits_(static_cast<uint32_t>(hash)) {} 204 205 Bit operator[](int pos) const { 206 DCHECK_LT(pos, kHashBits); 207 return bits_ & (static_cast<decltype(bits_)>(1) << (kHashBits - pos - 1)) 208 ? kRight 209 : kLeft; 210 } 211 212 bool operator<(HashValue other) const { return bits_ < other.bits_; } 213 bool operator==(HashValue other) const { return bits_ == other.bits_; } 214 bool operator!=(HashValue other) const { return bits_ != other.bits_; } 215 HashValue operator^(HashValue other) const { 216 return HashValue(bits_ ^ other.bits_); 217 } 218 219 private: 220 static_assert(sizeof(uint32_t) * 8 == kHashBits, "wrong type for bits_"); 221 uint32_t bits_; 222 }; 223 224 template <class Key, class Value, class Hasher> 225 class PersistentMap<Key, Value, Hasher>::iterator { 226 public: 227 const value_type operator*() const { 228 if (current_->more) { 229 return *more_iter_; 230 } else { 231 return current_->key_value; 232 } 233 } 234 235 iterator& operator++() { 236 do { 237 if (!current_) { 238 // Iterator is past the end. 239 return *this; 240 } 241 if (current_->more) { 242 DCHECK(more_iter_ != current_->more->end()); 243 ++more_iter_; 244 if (more_iter_ != current_->more->end()) return *this; 245 } 246 if (level_ == 0) { 247 *this = end(def_value_); 248 return *this; 249 } 250 --level_; 251 while (current_->key_hash[level_] == kRight || path_[level_] == nullptr) { 252 if (level_ == 0) { 253 *this = end(def_value_); 254 return *this; 255 } 256 --level_; 257 } 258 const FocusedTree* first_right_alternative = path_[level_]; 259 level_++; 260 current_ = FindLeftmost(first_right_alternative, &level_, &path_); 261 if (current_->more) { 262 more_iter_ = current_->more->begin(); 263 } 264 } while (!((**this).second != def_value())); 265 return *this; 266 } 267 268 bool operator==(const iterator& other) const { 269 if (is_end()) return other.is_end(); 270 if (other.is_end()) return false; 271 if (current_->key_hash != other.current_->key_hash) { 272 return false; 273 } else { 274 return (**this).first == (*other).first; 275 } 276 } 277 bool operator!=(const iterator& other) const { return !(*this == other); } 278 279 bool operator<(const iterator& other) const { 280 if (is_end()) return false; 281 if (other.is_end()) return true; 282 if (current_->key_hash == other.current_->key_hash) { 283 return (**this).first < (*other).first; 284 } else { 285 return current_->key_hash < other.current_->key_hash; 286 } 287 } 288 289 bool is_end() const { return current_ == nullptr; } 290 291 const Value& def_value() { return def_value_; } 292 293 static iterator begin(const FocusedTree* tree, Value def_value) { 294 iterator i(def_value); 295 i.current_ = FindLeftmost(tree, &i.level_, &i.path_); 296 if (i.current_->more) { 297 i.more_iter_ = i.current_->more->begin(); 298 } 299 // Skip entries with default value. PersistentMap iterators must never point 300 // to a default value. 301 while (!i.is_end() && !((*i).second != def_value)) ++i; 302 return i; 303 } 304 305 static iterator end(Value def_value) { return iterator(def_value); } 306 307 private: 308 int level_; 309 typename FocusedTree::more_iterator more_iter_; 310 const FocusedTree* current_; 311 std::array<const FocusedTree*, kHashBits> path_; 312 Value def_value_; 313 314 explicit iterator(Value def_value) 315 : level_(0), current_(nullptr), def_value_(def_value) {} 316 }; 317 318 template <class Key, class Value, class Hasher> 319 class PersistentMap<Key, Value, Hasher>::double_iterator { 320 public: 321 std::tuple<Key, Value, Value> operator*() { 322 if (first_current_) { 323 auto pair = *first_; 324 return std::make_tuple( 325 pair.first, pair.second, 326 second_current_ ? (*second_).second : second_.def_value()); 327 } else { 328 DCHECK(second_current_); 329 auto pair = *second_; 330 return std::make_tuple(pair.first, first_.def_value(), pair.second); 331 } 332 } 333 334 double_iterator& operator++() { 335 #ifdef DEBUG 336 iterator old_first = first_; 337 iterator old_second = second_; 338 #endif 339 if (first_current_) { 340 ++first_; 341 DCHECK(old_first < first_); 342 } 343 if (second_current_) { 344 ++second_; 345 DCHECK(old_second < second_); 346 } 347 return *this = double_iterator(first_, second_); 348 } 349 350 double_iterator(iterator first, iterator second) 351 : first_(first), second_(second) { 352 if (first_ == second_) { 353 first_current_ = second_current_ = true; 354 } else if (first_ < second_) { 355 first_current_ = true; 356 second_current_ = false; 357 } else { 358 DCHECK(second_ < first_); 359 first_current_ = false; 360 second_current_ = true; 361 } 362 } 363 364 bool operator!=(const double_iterator& other) { 365 return first_ != other.first_ || second_ != other.second_; 366 } 367 368 bool is_end() const { return first_.is_end() && second_.is_end(); } 369 370 private: 371 iterator first_; 372 iterator second_; 373 bool first_current_; 374 bool second_current_; 375 }; 376 377 template <class Key, class Value, class Hasher> 378 void PersistentMap<Key, Value, Hasher>::Set(Key key, Value value) { 379 HashValue key_hash = HashValue(Hasher()(key)); 380 std::array<const FocusedTree*, kHashBits> path; 381 int length = 0; 382 const FocusedTree* old = FindHash(key_hash, &path, &length); 383 ZoneMap<Key, Value>* more = nullptr; 384 if (!(GetFocusedValue(old, key) != value)) return; 385 if (old && !(old->more == nullptr && old->key_value.key() == key)) { 386 more = new (zone_->New(sizeof(*more))) ZoneMap<Key, Value>(zone_); 387 if (old->more) { 388 *more = *old->more; 389 } else { 390 (*more)[old->key_value.key()] = old->key_value.value(); 391 } 392 (*more)[key] = value; 393 } 394 FocusedTree* tree = 395 new (zone_->New(sizeof(FocusedTree) + 396 std::max(0, length - 1) * sizeof(const FocusedTree*))) 397 FocusedTree{KeyValue(std::move(key), std::move(value)), 398 static_cast<int8_t>(length), 399 key_hash, 400 more, 401 {}}; 402 for (int i = 0; i < length; ++i) { 403 tree->path(i) = path[i]; 404 } 405 *this = PersistentMap(tree, zone_, def_value_); 406 } 407 408 template <class Key, class Value, class Hasher> 409 const typename PersistentMap<Key, Value, Hasher>::FocusedTree* 410 PersistentMap<Key, Value, Hasher>::FindHash(HashValue hash) const { 411 const FocusedTree* tree = tree_; 412 int level = 0; 413 while (tree && hash != tree->key_hash) { 414 while ((hash ^ tree->key_hash)[level] == 0) { 415 ++level; 416 } 417 tree = level < tree->length ? tree->path(level) : nullptr; 418 ++level; 419 } 420 return tree; 421 } 422 423 template <class Key, class Value, class Hasher> 424 const typename PersistentMap<Key, Value, Hasher>::FocusedTree* 425 PersistentMap<Key, Value, Hasher>::FindHash( 426 HashValue hash, std::array<const FocusedTree*, kHashBits>* path, 427 int* length) const { 428 const FocusedTree* tree = tree_; 429 int level = 0; 430 while (tree && hash != tree->key_hash) { 431 int map_length = tree->length; 432 while ((hash ^ tree->key_hash)[level] == 0) { 433 (*path)[level] = level < map_length ? tree->path(level) : nullptr; 434 ++level; 435 } 436 (*path)[level] = tree; 437 tree = level < tree->length ? tree->path(level) : nullptr; 438 ++level; 439 } 440 if (tree) { 441 while (level < tree->length) { 442 (*path)[level] = tree->path(level); 443 ++level; 444 } 445 } 446 *length = level; 447 return tree; 448 } 449 450 template <class Key, class Value, class Hasher> 451 const Value& PersistentMap<Key, Value, Hasher>::GetFocusedValue( 452 const FocusedTree* tree, const Key& key) const { 453 if (!tree) { 454 return def_value_; 455 } 456 if (tree->more) { 457 auto it = tree->more->find(key); 458 if (it == tree->more->end()) 459 return def_value_; 460 else 461 return it->second; 462 } else { 463 if (key == tree->key_value.key()) { 464 return tree->key_value.value(); 465 } else { 466 return def_value_; 467 } 468 } 469 } 470 471 template <class Key, class Value, class Hasher> 472 const typename PersistentMap<Key, Value, Hasher>::FocusedTree* 473 PersistentMap<Key, Value, Hasher>::GetChild(const FocusedTree* tree, int level, 474 Bit bit) { 475 if (tree->key_hash[level] == bit) { 476 return tree; 477 } else if (level < tree->length) { 478 return tree->path(level); 479 } else { 480 return nullptr; 481 } 482 } 483 484 template <class Key, class Value, class Hasher> 485 const typename PersistentMap<Key, Value, Hasher>::FocusedTree* 486 PersistentMap<Key, Value, Hasher>::FindLeftmost( 487 const FocusedTree* start, int* level, 488 std::array<const FocusedTree*, kHashBits>* path) { 489 const FocusedTree* current = start; 490 while (*level < current->length) { 491 if (const FocusedTree* child = GetChild(current, *level, kLeft)) { 492 (*path)[*level] = GetChild(current, *level, kRight); 493 current = child; 494 ++*level; 495 } else if (const FocusedTree* child = GetChild(current, *level, kRight)) { 496 (*path)[*level] = GetChild(current, *level, kLeft); 497 current = child; 498 ++*level; 499 } else { 500 UNREACHABLE(); 501 } 502 } 503 return current; 504 } 505 506 template <class Key, class Value, class Hasher> 507 std::ostream& operator<<(std::ostream& os, 508 const PersistentMap<Key, Value, Hasher>& map) { 509 os << "{"; 510 bool first = true; 511 for (auto pair : map) { 512 if (!first) os << ", "; 513 first = false; 514 os << pair.first << ": " << pair.second; 515 } 516 return os << "}"; 517 } 518 519 } // namespace compiler 520 } // namespace internal 521 } // namespace v8 522 523 #endif // V8_COMPILER_PERSISTENT_MAP_H_ 524