1 /** 2 * Copyright 2019 Huawei Technologies Co., Ltd 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 #ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_UTIL_INDEX_H_ 17 #define MINDSPORE_CCSRC_MINDDATA_DATASET_UTIL_INDEX_H_ 18 19 #include <algorithm> 20 #include <atomic> 21 #include <functional> 22 #include <utility> 23 #include <memory> 24 #include <deque> 25 #include "./securec.h" 26 #include "minddata/dataset/util/allocator.h" 27 #include "minddata/dataset/util/list.h" 28 #include "minddata/dataset/util/lock.h" 29 #include "minddata/dataset/util/memory_pool.h" 30 #include "minddata/dataset/util/services.h" 31 #include "minddata/dataset/util/status.h" 32 33 namespace mindspore { 34 namespace dataset { 35 // Default traits for a B+ tree 36 struct BPlusTreeTraits { 37 // This determines the limit of number of keys in a node. 38 using slot_type = uint16_t; 39 // Number of slots in each leaf of the tree. 40 static constexpr slot_type kLeafSlots = 256; 41 // Number of slots in each inner node of the tree 42 static constexpr slot_type kInnerSlots = 128; 43 }; 44 45 /// Implementation of B+ tree 46 /// @tparam K -- the type of key 47 /// @tparam V -- the type of value 48 /// @tparam A -- allocator 49 /// @tparam C -- comparison class 50 /// @tparam T -- trait 51 template <typename K, typename V, typename A = std::allocator<V>, typename C = std::less<K>, 52 typename T = BPlusTreeTraits> 53 class BPlusTree { 54 public: 55 enum class IndexRc : char { 56 kOk = 0, 57 kDuplicateKey = 1, 58 kSlotFull = 2, 59 kKeyNotFound = 3, 60 kNullPointer = 4, 61 kOutOfMemory = 5, 62 kRetry = 6, 63 kUnexpectedError = 127 64 }; 65 #define RETURN_IF_BAD_RC(_s) \ 66 do { \ 67 IndexRc __rc = (_s); \ 68 if (__rc != IndexRc::kOk) { \ 69 return __rc; \ 70 } \ 71 } while (false) 72 IndexRc2Status(IndexRc rc)73 Status IndexRc2Status(IndexRc rc) { 74 if (rc == IndexRc::kOk) { 75 return Status(StatusCode::kSuccess); 76 } else if (rc == IndexRc::kOutOfMemory) { 77 return Status(StatusCode::kMDOutOfMemory); 78 } else if (rc == IndexRc::kDuplicateKey) { 79 return Status(StatusCode::kMDDuplicateKey); 80 } else { 81 RETURN_STATUS_UNEXPECTED(std::to_string(static_cast<int>(rc))); 82 } 83 } 84 85 using key_type = K; 86 using value_type = V; 87 using key_compare = C; 88 using slot_type = typename T::slot_type; 89 using traits = T; 90 using value_allocator = A; 91 using key_allocator = typename value_allocator::template rebind<key_type>::other; 92 using slot_allocator = typename value_allocator::template rebind<slot_type>::other; 93 94 BPlusTree(); 95 96 explicit BPlusTree(const Allocator<V> &alloc); 97 98 ~BPlusTree() noexcept; 99 100 BPlusTree(const BPlusTree &) = delete; 101 102 BPlusTree(BPlusTree &&) = delete; 103 104 BPlusTree &operator=(const BPlusTree &) = delete; 105 106 BPlusTree &operator=(BPlusTree &&) = delete; 107 key_comp()108 key_compare key_comp() const { return key_less_; } 109 size()110 size_t size() const { return stats_.size_; } 111 empty()112 bool empty() const { return (size() == 0); } 113 114 /// @param key 115 /// @param value 116 /// @return 117 Status DoInsert(const key_type &key, const value_type &value); 118 Status DoInsert(const key_type &key, std::unique_ptr<value_type> &&value); 119 120 // Update a new value for a given key. 121 std::unique_ptr<value_type> DoUpdate(const key_type &key, const value_type &new_value); 122 std::unique_ptr<value_type> DoUpdate(const key_type &key, std::unique_ptr<value_type> &&new_value); 123 124 // Statistics 125 struct tree_stats { 126 std::atomic<uint64_t> size_; 127 uint32_t leaves_; 128 uint32_t inner_nodes_; 129 uint32_t level_; 130 tree_statstree_stats131 tree_stats() : size_(0), leaves_(0), inner_nodes_(0), level_(0) {} 132 }; 133 134 /// \brief Statistics functions 135 /// \return Return the height of the tree GetHeight()136 auto GetHeight() const { return empty() ? 0 : stats_.level_ + 1; } 137 /// \return Order of the B+ tree GetOrder()138 auto GetOrder() const { return traits::kLeafSlots; } 139 /// \return Number of leaves nodes GetNumLeaves()140 auto GetNumLeaves() const { return stats_.leaves_; } 141 /// \return Number of inner nodes GetNumInnerNodes()142 auto GetNumInnerNodes() const { return stats_.inner_nodes_; } 143 144 /// \brief Toggle locking 145 /// \note Once locking is off. It is user's responsibility to ensure concurrency SetLocking(bool on_off)146 void SetLocking(bool on_off) { 147 UniqueLock lck(&rw_lock_); 148 acquire_lock_ = on_off; 149 } 150 LockShared()151 void LockShared() { rw_lock_.LockShared(); } 152 LockExclusive()153 void LockExclusive() { rw_lock_.LockExclusive(); } 154 Unlock()155 void Unlock() { rw_lock_.Unlock(); } 156 157 private: 158 // Abstract class of a node (leaf or inner) 159 class BaseNode { 160 public: 161 friend class BPlusTree; 162 163 virtual bool is_leafnode() const = 0; 164 165 virtual bool is_full() const = 0; 166 BaseNode(const value_allocator & alloc)167 explicit BaseNode(const value_allocator &alloc) : alloc_(alloc) {} 168 169 virtual ~BaseNode() = default; 170 171 protected: 172 mutable RWLock rw_lock_; 173 value_allocator alloc_; 174 175 private: 176 Node<BaseNode> lru_; 177 }; 178 179 // This control block keeps track of all the nodes we traverse on insert. 180 // To maximize concurrency, internal nodes are latched S. If a node split 181 // is required, we must releases all the latches and redo it again and change 182 // the latch mode from S to X. 183 struct LockPathCB { 184 enum class LockMode : char { kShared = 0, kExclusive = 1, kNone = 2 }; 185 186 struct path { 187 BaseNode *node_; 188 bool locked_; 189 pathLockPathCB::path190 path() : node_(nullptr), locked_(false) {} 191 pathLockPathCB::path192 path(BaseNode *p, LockMode lockmode) : node_(p), locked_(false) { 193 if (lockmode == LockMode::kExclusive) { 194 p->rw_lock_.LockExclusive(); 195 locked_ = true; 196 } else if (lockmode == LockMode::kShared) { 197 p->rw_lock_.LockShared(); 198 locked_ = true; 199 } 200 } 201 }; 202 LockPathCBLockPathCB203 LockPathCB(BPlusTree *tree, bool retryWithXlock) : self_(tree), latch_shared_(true) { 204 if (retryWithXlock) { 205 latch_shared_ = false; 206 } 207 if (latch_shared_) { 208 tree->rw_lock_.LockShared(); 209 } else { 210 tree->rw_lock_.LockExclusive(); 211 } 212 } 213 ~LockPathCBLockPathCB214 ~LockPathCB() noexcept { 215 // Make sure all locks are released. 216 while (!paths_.empty()) { 217 path p = paths_.back(); 218 paths_.pop_back(); 219 if (p.locked_) { 220 p.node_->rw_lock_.Unlock(); 221 } 222 } 223 self_->rw_lock_.Unlock(); 224 self_ = nullptr; 225 } 226 LockNodeLockPathCB227 void LockNode(BaseNode *p, LockMode locktype) { paths_.emplace_back(p, locktype); } 228 UnlockMyParentsLockPathCB229 void UnlockMyParents(BaseNode *me) { 230 path p = paths_.front(); 231 while (p.node_ != me) { 232 if (p.locked_) { 233 p.node_->rw_lock_.Unlock(); 234 } 235 paths_.pop_front(); 236 p = paths_.front(); 237 } 238 } 239 240 BPlusTree *self_; 241 std::deque<path> paths_; 242 bool latch_shared_; 243 }; 244 245 // Definition of inner node which fans to either inner node or leaf node. 246 class InnerNode : public BaseNode { 247 public: 248 friend class BPlusTree; 249 250 using alloc_type = typename value_allocator::template rebind<InnerNode>::other; 251 is_leafnode()252 bool is_leafnode() const override { return false; } 253 is_full()254 bool is_full() const override { return (slotuse_ == traits::kInnerSlots); } 255 256 IndexRc Sort(); 257 258 // 50/50 split 259 IndexRc Split(InnerNode *to, key_type *split_key); 260 261 IndexRc InsertIntoSlot(slot_type slot, const key_type &key, BaseNode *ptr); 262 InnerNode(const value_allocator & alloc)263 explicit InnerNode(const value_allocator &alloc) : BaseNode::BaseNode(alloc), slotuse_(0) {} 264 265 ~InnerNode() = default; 266 267 slot_type slot_dir_[traits::kInnerSlots] = {0}; 268 key_type keys_[traits::kInnerSlots] = {0}; 269 BaseNode *data_[traits::kInnerSlots + 1] = {nullptr}; 270 slot_type slotuse_; 271 }; 272 273 // Definition of a leaf node which contains the key/value pair 274 class LeafNode : public BaseNode { 275 public: 276 friend class BPlusTree; 277 278 using alloc_type = typename value_allocator::template rebind<LeafNode>::other; 279 Node<LeafNode> link_; 280 is_leafnode()281 bool is_leafnode() const override { return true; } 282 is_full()283 bool is_full() const override { return (slotuse_ == traits::kLeafSlots); } 284 285 IndexRc Sort(); 286 287 // 50/50 split 288 IndexRc Split(LeafNode *to); 289 290 IndexRc InsertIntoSlot(LockPathCB *insCB, slot_type slot, const key_type &key, std::unique_ptr<value_type> &&value); 291 LeafNode(const value_allocator & alloc)292 explicit LeafNode(const value_allocator &alloc) : BaseNode::BaseNode(alloc), slotuse_(0) {} 293 294 ~LeafNode() = default; 295 296 slot_type slot_dir_[traits::kLeafSlots] = {0}; 297 key_type keys_[traits::kLeafSlots] = {0}; 298 std::unique_ptr<value_type> data_[traits::kLeafSlots]; 299 slot_type slotuse_; 300 }; 301 302 mutable RWLock rw_lock_; 303 value_allocator alloc_; 304 // All the leaf nodes. Used by the iterator to traverse all the key/values. 305 List<LeafNode> leaf_nodes_; 306 // All the nodes (inner + leaf). Used by the destructor to free the memory of all the nodes. 307 List<BaseNode> all_; 308 // Pointer to the root of the tree. 309 BaseNode *root_; 310 // Key comparison object 311 key_compare key_less_; 312 // Stat 313 tree_stats stats_; 314 // lock mode 315 bool acquire_lock_; 316 Init()317 void Init() { 318 typename LeafNode::alloc_type alloc(alloc_); 319 LeafNode *p = nullptr; 320 try { 321 p = alloc.allocate(1); 322 } catch (std::bad_alloc &e) { 323 p = nullptr; 324 return; 325 } 326 root_ = new (p) LeafNode(alloc_); 327 all_.Prepend(p); 328 leaf_nodes_.Append(p); 329 stats_.leaves_++; 330 } 331 LessThan(const key_type & a,const key_type & b)332 bool LessThan(const key_type &a, const key_type &b) const { return key_less_(a, b); } 333 EqualOrLessThan(const key_type & a,const key_type & b)334 bool EqualOrLessThan(const key_type &a, const key_type &b) const { return !key_less_(b, a); } 335 Equal(const key_type & a,const key_type & b)336 bool Equal(const key_type &a, const key_type &b) const { return !key_less_(a, b) && !key_less_(b, a); } 337 338 IndexRc AllocateInner(InnerNode **p); 339 340 IndexRc AllocateLeaf(LeafNode **p); 341 342 template <typename node_type> 343 slot_type FindSlot(const node_type *node, const key_type &key, bool *duplicate = nullptr) const { 344 slot_type lo = 0; 345 while (lo < node->slotuse_ && key_comp()(node->keys_[node->slot_dir_[lo]], key)) { 346 ++lo; 347 } 348 bool keymatch = (lo < node->slotuse_ && Equal(key, node->keys_[node->slot_dir_[lo]])); 349 if (keymatch && !node->is_leafnode()) { 350 // For an inner node and we match a key during search, we should look into the next slot. 351 ++lo; 352 } 353 if (duplicate != nullptr) { 354 *duplicate = keymatch; 355 } 356 return lo; 357 } 358 359 IndexRc LeafInsertKeyValue(LockPathCB *ins_cb, LeafNode *node, const key_type &key, 360 std::unique_ptr<value_type> &&value, key_type *split_key, LeafNode **split_node); 361 362 IndexRc InnerInsertKeyChild(InnerNode *node, const key_type &key, BaseNode *ptr, key_type *split_key, 363 InnerNode **split_node); 364 FindBranch(InnerNode * inner,slot_type slot)365 inline BaseNode *FindBranch(InnerNode *inner, slot_type slot) const { 366 BaseNode *child = nullptr; 367 if (slot == 0) { 368 child = inner->data_[0]; 369 } else { 370 child = inner->data_[inner->slot_dir_[slot - 1] + 1]; 371 } 372 return child; 373 } 374 375 IndexRc InsertKeyValue(LockPathCB *ins_cb, BaseNode *n, const key_type &key, std::unique_ptr<value_type> &&value, 376 key_type *split_key, BaseNode **split_node); 377 378 IndexRc Locate(RWLock *parent_lock, bool forUpdate, BaseNode *top, const key_type &key, LeafNode **ln, 379 slot_type *s) const; 380 381 public: 382 class Iterator : public std::iterator<std::bidirectional_iterator_tag, value_type> { 383 public: 384 using reference = BPlusTree::value_type &; 385 using pointer = BPlusTree::value_type *; 386 Iterator(BPlusTree * btree)387 explicit Iterator(BPlusTree *btree) : cur_(btree->leaf_nodes_.head), slot_(0), locked_(false) {} 388 cur_(leaf)389 Iterator(LeafNode *leaf, slot_type slot, bool locked = false) : cur_(leaf), slot_(slot), locked_(locked) {} 390 391 ~Iterator(); 392 393 explicit Iterator(const Iterator &); 394 395 Iterator &operator=(const Iterator &lhs); 396 397 explicit Iterator(Iterator &&) noexcept; 398 399 Iterator &operator=(Iterator &&lhs); 400 401 pointer operator->() const { return cur_->data_[cur_->slot_dir_[slot_]].get(); } 402 403 reference operator*() const { return *(cur_->data_[cur_->slot_dir_[slot_]].get()); } 404 key()405 const key_type &key() const { return cur_->keys_[cur_->slot_dir_[slot_]]; } 406 value()407 value_type &value() const { return *(cur_->data_[cur_->slot_dir_[slot_]].get()); } 408 409 // Prefix++ 410 Iterator &operator++(); 411 412 // Postfix++ 413 Iterator operator++(int); 414 415 // Prefix-- 416 Iterator &operator--(); 417 418 // Postfix-- 419 Iterator operator--(int); 420 421 bool operator==(const Iterator &x) const { return (x.cur_ == cur_) && (x.slot_ == slot_); } 422 bool operator!=(const Iterator &x) const { return (x.cur_ != cur_) || (x.slot_ != slot_); } 423 LockShared()424 void LockShared() { 425 cur_->rw_lock_.LockShared(); 426 locked_ = true; 427 } 428 LockExclusive()429 void LockExclusive() { 430 cur_->rw_lock_.LockExclusive(); 431 locked_ = true; 432 } 433 Unlock()434 void Unlock() { 435 cur_->rw_lock_.Unlock(); 436 locked_ = false; 437 } 438 439 private: 440 typename BPlusTree::LeafNode *cur_; 441 slot_type slot_; 442 bool locked_; 443 }; 444 445 class ConstIterator : public std::iterator<std::bidirectional_iterator_tag, value_type> { 446 public: 447 using reference = BPlusTree::value_type &; 448 using pointer = BPlusTree::value_type *; 449 ConstIterator(const BPlusTree * btree)450 explicit ConstIterator(const BPlusTree *btree) : cur_(btree->leaf_nodes_.head), slot_(0), locked_(false) {} 451 452 ~ConstIterator(); 453 454 ConstIterator(const LeafNode *leaf, slot_type slot, bool locked = false) cur_(leaf)455 : cur_(leaf), slot_(slot), locked_(locked) {} 456 457 explicit ConstIterator(const ConstIterator &); 458 459 ConstIterator &operator=(const ConstIterator &lhs); 460 461 explicit ConstIterator(ConstIterator &&) noexcept; 462 463 ConstIterator &operator=(ConstIterator &&lhs); 464 465 pointer operator->() const { return cur_->data_[cur_->slot_dir_[slot_]].get(); } 466 467 reference operator*() const { return *(cur_->data_[cur_->slot_dir_[slot_]].get()); } 468 key()469 const key_type &key() const { return cur_->keys_[cur_->slot_dir_[slot_]]; } 470 value()471 value_type &value() const { return *(cur_->data_[cur_->slot_dir_[slot_]].get()); } 472 473 // Prefix++ 474 ConstIterator &operator++(); 475 476 // Postfix++ 477 ConstIterator operator++(int); 478 479 // Prefix-- 480 ConstIterator &operator--(); 481 482 // Postfix-- 483 ConstIterator operator--(int); 484 485 bool operator==(const ConstIterator &x) const { return (x.cur_ == cur_) && (x.slot_ == slot_); } 486 bool operator!=(const ConstIterator &x) const { return (x.cur_ != cur_) || (x.slot_ != slot_); } 487 LockShared()488 void LockShared() { 489 cur_->rw_lock_.LockShared(); 490 locked_ = true; 491 } 492 LockExclusive()493 void LockExclusive() { 494 cur_->rw_lock_.LockExclusive(); 495 locked_ = true; 496 } 497 Unlock()498 void Unlock() { 499 cur_->rw_lock_.Unlock(); 500 locked_ = false; 501 } 502 503 private: 504 const typename BPlusTree::LeafNode *cur_; 505 slot_type slot_; 506 bool locked_; 507 }; 508 509 Iterator begin(); 510 Iterator end(); 511 512 ConstIterator begin() const; 513 ConstIterator end() const; 514 515 ConstIterator cbegin() const; 516 ConstIterator cend() const; 517 518 // Locate the entry with key 519 std::pair<ConstIterator, bool> Search(const key_type &key) const; 520 std::pair<Iterator, bool> Search(const key_type &key); 521 522 value_type operator[](key_type key); 523 }; 524 } // namespace dataset 525 } // namespace mindspore 526 #endif // MINDSPORE_CCSRC_MINDDATA_DATASET_UTIL_INDEX_H_ 527 528 #include "btree_impl.tpp" 529 #include "btree_iterator.tpp" 530