1/* Copyright 2019 Huawei Technologies Co., Ltd.All Rights Reserved. 2 * 3 * Licensed under the Apache License, Version 2.0 (the "License"); 4 * you may not use this file except in compliance with the License. 5 * You may obtain a copy of the License at 6 * 7 * http://www.apache.org/licenses/LICENSE-2.0 8 * 9 * Unless required by applicable law or agreed to in writing, software 10 * distributed under the License is distributed on an "AS IS" BASIS, 11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 * See the License for the specific language governing permissions and 13 * limitations under the License. 14 */ 15#ifndef DATASET_UTIL_BTREE_H_ 16#define DATASET_UTIL_BTREE_H_ 17 18#include "btree.h" 19 20namespace mindspore { 21namespace dataset { 22template <typename K, typename V, typename A, typename C, typename T> 23typename BPlusTree<K, V, A, C, T>::IndexRc BPlusTree<K, V, A, C, T>::InnerNode::Sort() { 24 // Build an inverse map. Basically it means keys[i] should be relocated to keys[inverse[i]]; 25 slot_allocator alloc(this->alloc_); 26 try { 27 // We use a unique_ptr will custom deleter to ensure the memory will be released when this 28 // function returns. 29 std::unique_ptr<slot_type[], std::function<void(slot_type *)>> memGuard( 30 alloc.allocate(traits::kInnerSlots), [&alloc](slot_type *p) { alloc.deallocate(p, traits::kInnerSlots); }); 31 slot_type *inverse = memGuard.get(); 32 for (slot_type i = 0; i < slotuse_; i++) { 33 inverse[slot_dir_[i]] = i; 34 } 35 for (slot_type i = 0; i < slotuse_; i++) { 36 while (inverse[i] != i) { 37 slot_type j = inverse[i]; 38 slot_type k = inverse[j]; 39 // Swap the key 40 std::swap(keys_[j], keys_[i]); 41 // Swap the pointers. 42 if ((j + 1) >= traits::kInnerSlots + 1 || (i + 1) >= traits::kInnerSlots + 1) { 43 return IndexRc::kUnexpectedError; 44 } 45 std::swap(data_[j + 1], data_[i + 1]); 46 // one key in order. 47 inverse[j] = j; 48 // continue to move 49 inverse[i] = k; 50 } 51 slot_dir_[i] = i; 52 } 53 return IndexRc::kOk; 54 } catch (std::bad_alloc &e) { 55 return IndexRc::kOutOfMemory; 56 } catch (std::exception &e) { 57 return IndexRc::kUnexpectedError; 58 } 59} 60 61template <typename K, typename V, typename A, typename C, typename T> 62typename BPlusTree<K, V, A, C, T>::IndexRc BPlusTree<K, V, A, C, T>::InnerNode::Split( 63 BPlusTree<K, V, A, C, T>::InnerNode *to, key_type *split_key) { 64 MS_ASSERT(to); 65 MS_ASSERT(to->slotuse_ == 0); 66 // It is simpler to sort first, then split. Other alternative is to move key by key to the 67 // new node. Also we need to deal with the 'holes' after a key is moved. 68 RETURN_IF_BAD_RC(this->Sort()); 69 slot_type mid = slotuse_ >> 1; 70 slot_type num_keys_to_move = slotuse_ - (mid + 1); 71 *split_key = keys_[mid]; 72 errno_t err = memmove_s(to->keys_, sizeof(to->keys_), keys_ + mid + 1, num_keys_to_move * sizeof(key_type)); 73 if (err != EOK) { 74 return IndexRc::kUnexpectedError; 75 } 76 err = memcpy_s(to->data_, sizeof(to->data_), data_ + mid + 1, (num_keys_to_move + 1) * sizeof(BaseNode *)); 77 if (err != EOK) { 78 return IndexRc::kUnexpectedError; 79 } 80 for (slot_type i = 0; i < num_keys_to_move; i++) { 81 to->slot_dir_[i] = i; 82 } 83 slotuse_ -= (num_keys_to_move + 1); // the split key is moved up. So one less 84 to->slotuse_ += num_keys_to_move; 85 return IndexRc::kOk; 86} 87 88template <typename K, typename V, typename A, typename C, typename T> 89typename BPlusTree<K, V, A, C, T>::IndexRc BPlusTree<K, V, A, C, T>::InnerNode::InsertIntoSlot( 90 slot_type slot, const key_type &key, BPlusTree<K, V, A, C, T>::BaseNode *ptr) { 91 if (is_full()) { 92 return IndexRc::kSlotFull; 93 } 94 // Shift the slot entries to the right and make room for the new comer. 95 // We don't sort the key and/or the data array until node split 96 auto num_keys_to_move = slotuse_ - slot; 97 if (num_keys_to_move > 0) { 98 auto *src = &slot_dir_[slot]; 99 auto *dest = &slot_dir_[slot + 1]; 100 auto destMax = sizeof(slot_dir_) - sizeof(slot_type) * (slot + 1); 101 auto amt = sizeof(slot_type) * num_keys_to_move; 102 errno_t err = memmove_s(dest, destMax, src, amt); 103 if (err != EOK) { 104 return IndexRc::kUnexpectedError; 105 } 106 } 107 slot_dir_[slot] = slotuse_; 108 keys_[slotuse_] = key; 109 data_[slotuse_ + 1] = ptr; 110 ++slotuse_; 111 return IndexRc::kOk; 112} 113 114template <typename K, typename V, typename A, typename C, typename T> 115typename BPlusTree<K, V, A, C, T>::IndexRc BPlusTree<K, V, A, C, T>::LeafNode::Sort() { 116 // Build an inverse map. Basically it means keys[i] should be relocated to keys[inverse[i]]; 117 slot_allocator alloc(this->alloc_); 118 try { 119 // We use a unique_ptr will custom deleter to ensure the memory will be released when this 120 // function returns. 121 std::unique_ptr<slot_type[], std::function<void(slot_type *)>> memGuard( 122 alloc.allocate(traits::kLeafSlots), [&alloc](slot_type *p) { alloc.deallocate(p, traits::kLeafSlots); }); 123 slot_type *inverse = memGuard.get(); 124 for (slot_type i = 0; i < slotuse_; i++) { 125 inverse[slot_dir_[i]] = i; 126 } 127 for (slot_type i = 0; i < slotuse_; i++) { 128 while (inverse[i] != i) { 129 slot_type j = inverse[i]; 130 slot_type k = inverse[j]; 131 // Swap the key 132 if (j >= traits::kLeafSlots || i >= traits::kLeafSlots) { 133 return IndexRc::kUnexpectedError; 134 } 135 std::swap(keys_[j], keys_[i]); 136 // Swap the shared pointers 137 std::swap(data_[j], data_[i]); 138 // one key in order. 139 inverse[j] = j; 140 // continue to move 141 inverse[i] = k; 142 } 143 slot_dir_[i] = i; 144 } 145 return IndexRc::kOk; 146 } catch (std::bad_alloc &e) { 147 return IndexRc::kOutOfMemory; 148 } catch (std::exception &e) { 149 return IndexRc::kUnexpectedError; 150 } 151} 152 153template <typename K, typename V, typename A, typename C, typename T> 154typename BPlusTree<K, V, A, C, T>::IndexRc BPlusTree<K, V, A, C, T>::LeafNode::Split( 155 BPlusTree<K, V, A, C, T>::LeafNode *to) { 156 MS_ASSERT(to); 157 MS_ASSERT(to->slotuse_ == 0); 158 // It is simpler to sort first, then split. Other alternative is to move key by key to the 159 // new node. Also we need to deal with the 'holes' after a key is moved. 160 RETURN_IF_BAD_RC(this->Sort()); 161 slot_type mid = slotuse_ >> 1; 162 slot_type num_keys_to_move = slotuse_ - mid; 163 errno_t err = memmove_s(to->keys_, sizeof(to->keys_), keys_ + mid, num_keys_to_move * sizeof(key_type)); 164 if (err != EOK) { 165 return IndexRc::kUnexpectedError; 166 } 167 for (slot_type i = 0; i < num_keys_to_move; i++) { 168 to->data_[i] = std::move(data_[i + mid]); 169 to->slot_dir_[i] = i; 170 } 171 slotuse_ -= num_keys_to_move; 172 to->slotuse_ += num_keys_to_move; 173 return IndexRc::kOk; 174} 175 176template <typename K, typename V, typename A, typename C, typename T> 177typename BPlusTree<K, V, A, C, T>::IndexRc BPlusTree<K, V, A, C, T>::LeafNode::InsertIntoSlot( 178 BPlusTree<K, V, A, C, T>::LockPathCB *insCB, slot_type slot, const key_type &key, 179 std::unique_ptr<value_type> &&value) { 180 if (is_full()) { 181 // If we need to do node split, we need to ensure all the intermediate nodes are locked exclusive. 182 // Otherwise we need to do a retry. 183 if (insCB == nullptr || !insCB->latch_shared_) { 184 return IndexRc::kSlotFull; 185 } else { 186 return IndexRc::kRetry; 187 } 188 } 189 // We can now let go all the locks of the parent. Nothing we do from now on will change the 190 // structure of the tree. 191 if (insCB) { 192 insCB->UnlockMyParents(this); 193 } 194 // Shift the slot entries to the right and make room for the new comer. 195 // We don't sort the key and/or the data array until node split 196 auto num_keys_to_move = slotuse_ - slot; 197 if (num_keys_to_move > 0) { 198 auto *src = &slot_dir_[slot]; 199 auto *dest = &slot_dir_[slot + 1]; 200 auto destMax = sizeof(slot_dir_) - sizeof(slot_type) * (slot + 1); 201 auto amt = sizeof(slot_type) * num_keys_to_move; 202 errno_t err = memmove_s(dest, destMax, src, amt); 203 if (err != EOK) { 204 return IndexRc::kUnexpectedError; 205 } 206 } 207 slot_dir_[slot] = slotuse_; 208 keys_[slotuse_] = key; 209 data_[slotuse_] = std::move(value); 210 ++slotuse_; 211 return IndexRc::kOk; 212} 213 214template <typename K, typename V, typename A, typename C, typename T> 215typename BPlusTree<K, V, A, C, T>::IndexRc BPlusTree<K, V, A, C, T>::AllocateInner( 216 BPlusTree<K, V, A, C, T>::InnerNode **p) { 217 if (p == nullptr) { 218 return IndexRc::kNullPointer; 219 } 220 typename InnerNode::alloc_type alloc(alloc_); 221 InnerNode *ptr = nullptr; 222 try { 223 ptr = alloc.allocate(1); 224 } catch (std::bad_alloc &e) { 225 return IndexRc::kOutOfMemory; 226 } catch (std::exception &e) { 227 return IndexRc::kUnexpectedError; 228 } 229 *p = new (ptr) InnerNode(alloc_); 230 all_.Prepend(ptr); 231 stats_.inner_nodes_++; 232 return IndexRc::kOk; 233} 234 235template <typename K, typename V, typename A, typename C, typename T> 236typename BPlusTree<K, V, A, C, T>::IndexRc BPlusTree<K, V, A, C, T>::AllocateLeaf( 237 BPlusTree<K, V, A, C, T>::LeafNode **p) { 238 if (p == nullptr) { 239 return IndexRc::kNullPointer; 240 } 241 typename LeafNode::alloc_type alloc(this->alloc_); 242 LeafNode *ptr = nullptr; 243 try { 244 ptr = alloc.allocate(1); 245 } catch (std::bad_alloc &e) { 246 return IndexRc::kOutOfMemory; 247 } catch (std::exception &e) { 248 return IndexRc::kUnexpectedError; 249 } 250 *p = new (ptr) LeafNode(alloc_); 251 all_.Prepend(ptr); 252 stats_.leaves_++; 253 return IndexRc::kOk; 254} 255 256template <typename K, typename V, typename A, typename C, typename T> 257typename BPlusTree<K, V, A, C, T>::IndexRc BPlusTree<K, V, A, C, T>::LeafInsertKeyValue( 258 BPlusTree<K, V, A, C, T>::LockPathCB *ins_cb, BPlusTree<K, V, A, C, T>::LeafNode *node, const key_type &key, 259 std::unique_ptr<value_type> &&value, key_type *split_key, BPlusTree<K, V, A, C, T>::LeafNode **split_node) { 260 bool duplicate; 261 slot_type slot = FindSlot(node, key, &duplicate); 262 if (duplicate) { 263 return IndexRc::kDuplicateKey; 264 } 265 IndexRc rc = node->InsertIntoSlot(ins_cb, slot, key, std::move(value)); 266 if (rc == IndexRc::kSlotFull) { 267 LeafNode *new_leaf = nullptr; 268 rc = AllocateLeaf(&new_leaf); 269 RETURN_IF_BAD_RC(rc); 270 leaf_nodes_.InsertAfter(node, new_leaf); 271 *split_node = new_leaf; 272 // 50/50 split 273 rc = node->Split(new_leaf); 274 RETURN_IF_BAD_RC(rc); 275 *split_key = new_leaf->keys_[0]; 276 if (LessThan(key, *split_key)) { 277 rc = node->InsertIntoSlot(nullptr, slot, key, std::move(value)); 278 RETURN_IF_BAD_RC(rc); 279 } else { 280 slot -= node->slotuse_; 281 rc = new_leaf->InsertIntoSlot(nullptr, slot, key, std::move(value)); 282 RETURN_IF_BAD_RC(rc); 283 } 284 } 285 return rc; 286} 287 288template <typename K, typename V, typename A, typename C, typename T> 289typename BPlusTree<K, V, A, C, T>::IndexRc BPlusTree<K, V, A, C, T>::InnerInsertKeyChild( 290 BPlusTree<K, V, A, C, T>::InnerNode *node, const key_type &key, BPlusTree<K, V, A, C, T>::BaseNode *ptr, 291 key_type *split_key, BPlusTree<K, V, A, C, T>::InnerNode **split_node) { 292 bool duplicate; 293 slot_type slot = FindSlot(node, key, &duplicate); 294 if (duplicate) { 295 return IndexRc::kDuplicateKey; 296 } 297 IndexRc rc = node->InsertIntoSlot(slot, key, ptr); 298 if (rc == IndexRc::kSlotFull) { 299 InnerNode *new_inner = nullptr; 300 rc = AllocateInner(&new_inner); 301 RETURN_IF_BAD_RC(rc); 302 *split_node = new_inner; 303 rc = node->Split(new_inner, split_key); 304 RETURN_IF_BAD_RC(rc); 305 if (LessThan(key, *split_key)) { 306 // Need to readjust the slot position since the split key is no longer in the two children. 307 slot = FindSlot(node, key); 308 rc = node->InsertIntoSlot(slot, key, ptr); 309 RETURN_IF_BAD_RC(rc); 310 } else { 311 // Same reasoning as above 312 slot = FindSlot(new_inner, key); 313 rc = new_inner->InsertIntoSlot(slot, key, ptr); 314 RETURN_IF_BAD_RC(rc); 315 } 316 } 317 return rc; 318} 319 320template <typename K, typename V, typename A, typename C, typename T> 321typename BPlusTree<K, V, A, C, T>::IndexRc BPlusTree<K, V, A, C, T>::InsertKeyValue( 322 BPlusTree<K, V, A, C, T>::LockPathCB *ins_cb, BPlusTree<K, V, A, C, T>::BaseNode *n, const key_type &key, 323 std::unique_ptr<value_type> &&value, key_type *split_key, BPlusTree<K, V, A, C, T>::BaseNode **split_node) { 324 if (split_key == nullptr || split_node == nullptr) { 325 return IndexRc::kUnexpectedError; 326 } 327 if (n->is_leafnode()) { 328 if (ins_cb) { 329 // Always lock the leaf in X. 330 ins_cb->LockNode(n, LockPathCB::LockMode::kExclusive); 331 } 332 auto *leaf = static_cast<LeafNode *>(n); 333 LeafNode *new_leaf = nullptr; 334 RETURN_IF_BAD_RC(LeafInsertKeyValue(ins_cb, leaf, key, std::move(value), split_key, &new_leaf)); 335 if (new_leaf) { 336 *split_node = new_leaf; 337 } 338 } else { 339 if (ins_cb) { 340 // For internal node, lock in S unless we are doing retry. 341 if (ins_cb->latch_shared_) { 342 ins_cb->LockNode(n, LockPathCB::LockMode::kShared); 343 } else { 344 ins_cb->LockNode(n, LockPathCB::LockMode::kExclusive); 345 } 346 } 347 auto *inner = static_cast<InnerNode *>(n); 348 slot_type slot = FindSlot(inner, key); 349 BaseNode *new_child = nullptr; 350 key_type new_key = key_type(); 351 RETURN_IF_BAD_RC(InsertKeyValue(ins_cb, FindBranch(inner, slot), key, std::move(value), &new_key, &new_child)); 352 if (new_child) { 353 InnerNode *new_inner = nullptr; 354 RETURN_IF_BAD_RC(InnerInsertKeyChild(inner, new_key, new_child, split_key, &new_inner)); 355 if (new_inner) { 356 *split_node = new_inner; 357 } 358 } 359 } 360 return IndexRc::kOk; 361} 362 363template <typename K, typename V, typename A, typename C, typename T> 364typename BPlusTree<K, V, A, C, T>::IndexRc BPlusTree<K, V, A, C, T>::Locate(RWLock *parent_lock, bool forUpdate, 365 BPlusTree<K, V, A, C, T>::BaseNode *top, 366 const key_type &key, 367 BPlusTree<K, V, A, C, T>::LeafNode **ln, 368 slot_type *s) const { 369 if (ln == nullptr || s == nullptr) { 370 return IndexRc::kNullPointer; 371 } 372 if (top == nullptr) { 373 return IndexRc::kKeyNotFound; 374 } 375 RWLock *myLock = nullptr; 376 if (parent_lock != nullptr) { 377 // Crabbing. Lock this node first, then unlock the parent. 378 myLock = &top->rw_lock_; 379 if (top->is_leafnode()) { 380 if (forUpdate) { 381 // We are holding the parent lock in S and try to lock this node with X. It is not possible to run 382 // into deadlock because no one will hold the child in X and trying to lock the parent in that order. 383 myLock->LockExclusive(); 384 } else { 385 myLock->LockShared(); 386 } 387 } else { 388 myLock->LockShared(); 389 } 390 parent_lock->Unlock(); 391 } 392 if (top->is_leafnode()) { 393 bool duplicate; 394 auto *leaf = static_cast<LeafNode *>(top); 395 slot_type slot = FindSlot(leaf, key, &duplicate); 396 // Need exact match. 397 if (duplicate) { 398 *ln = leaf; 399 *s = slot; 400 } else { 401 if (myLock != nullptr) { 402 myLock->Unlock(); 403 } 404 return IndexRc::kKeyNotFound; 405 } 406 } else { 407 auto *inner = static_cast<InnerNode *>(top); 408 slot_type slot = FindSlot(inner, key); 409 return Locate(myLock, forUpdate, FindBranch(inner, slot), key, ln, s); 410 } 411 // We still have a S lock on the leaf node. Leave it there. The iterator will unlock it for us. 412 return IndexRc::kOk; 413} 414 415template <typename K, typename V, typename A, typename C, typename T> 416BPlusTree<K, V, A, C, T>::BPlusTree() 417 : leaf_nodes_(&LeafNode::link_), all_(&BaseNode::lru_), root_(nullptr), acquire_lock_(true) { 418 Init(); 419} 420 421template <typename K, typename V, typename A, typename C, typename T> 422BPlusTree<K, V, A, C, T>::BPlusTree(const Allocator<V> &alloc) 423 : alloc_(alloc), leaf_nodes_(&LeafNode::link_), all_(&BaseNode::lru_), root_(nullptr), acquire_lock_(true) { 424 Init(); 425} 426 427template <typename K, typename V, typename A, typename C, typename T> 428BPlusTree<K, V, A, C, T>::~BPlusTree() noexcept { 429 // We have a list of all the nodes allocated. Traverse them and free all the memory 430 BaseNode *n = all_.head; 431 BaseNode *t = nullptr; 432 while (n) { 433 t = n->lru_.next; 434 all_.Remove(n); 435 if (n->is_leafnode()) { 436 auto *leaf = static_cast<LeafNode *>(n); 437 typename LeafNode::alloc_type alloc(alloc_); 438 leaf->~LeafNode(); 439 alloc.deallocate(leaf, 1); 440 } else { 441 auto *in = static_cast<InnerNode *>(n); 442 typename InnerNode::alloc_type alloc(alloc_); 443 in->~InnerNode(); 444 alloc.deallocate(in, 1); 445 } 446 n = t; 447 } 448 root_ = nullptr; 449} 450 451template <typename K, typename V, typename A, typename C, typename T> 452Status BPlusTree<K, V, A, C, T>::DoInsert(const key_type &key, std::unique_ptr<value_type> &&value) { 453 IndexRc rc; 454 bool retry = false; 455 do { 456 // Track all the paths to the target and lock each internal node in S. 457 LockPathCB InsCB(this, retry); 458 // Initially we lock path in S unless we need to do node split. 459 retry = false; 460 BaseNode *new_child = nullptr; 461 key_type new_key = key_type(); 462 rc = InsertKeyValue(acquire_lock_ ? &InsCB : nullptr, root_, key, std::move(value), &new_key, &new_child); 463 if (rc == IndexRc::kRetry) { 464 retry = true; 465 } else if (rc != IndexRc::kOk) { 466 return IndexRc2Status(rc); 467 } else if (new_child != nullptr) { 468 // root is full 469 InnerNode *new_root = nullptr; 470 rc = AllocateInner(&new_root); 471 if (rc == IndexRc::kOk) { 472 rc = new_root->InsertIntoSlot(0, new_key, new_child); 473 if (rc != IndexRc::kOk) { 474 return IndexRc2Status(rc); 475 } 476 new_root->data_[0] = root_; 477 root_ = new_root; 478 stats_.level_++; 479 } else { 480 return IndexRc2Status(rc); 481 } 482 } 483 } while (retry); 484 (void)stats_.size_++; 485 return Status::OK(); 486} 487 488template <typename K, typename V, typename A, typename C, typename T> 489Status BPlusTree<K, V, A, C, T>::DoInsert(const key_type &key, const value_type &value) { 490 // We don't store the value directly into the leaf node as it is expensive to move it during node split. 491 // Rather we store a pointer instead. 492 return DoInsert(key, std::make_unique<value_type>(value)); 493} 494 495template <typename K, typename V, typename A, typename C, typename T> 496std::unique_ptr<V> BPlusTree<K, V, A, C, T>::DoUpdate(const key_type &key, const value_type &new_value) { 497 return DoUpdate(key, std::make_unique<value_type>(new_value)); 498} 499 500template <typename K, typename V, typename A, typename C, typename T> 501std::unique_ptr<V> BPlusTree<K, V, A, C, T>::DoUpdate(const key_type &key, std::unique_ptr<value_type> &&new_value) { 502 if (root_ != nullptr) { 503 LeafNode *leaf = nullptr; 504 slot_type slot; 505 RWLock *myLock = nullptr; 506 if (acquire_lock_) { 507 myLock = &this->rw_lock_; 508 // Lock the tree in S, pass the lock to Locate which will unlock it for us underneath. 509 myLock->LockShared(); 510 } 511 IndexRc rc = Locate(myLock, true, root_, key, &leaf, &slot); 512 if (rc == IndexRc::kOk) { 513 // All locks from the tree to the parent of leaf are all gone. We still have a X lock 514 // on the leaf. 515 // Swap out the old value and replace it with new value. 516 std::unique_ptr<value_type> old = std::move(leaf->data_[leaf->slot_dir_[slot]]); 517 leaf->data_[leaf->slot_dir_[slot]] = std::move(new_value); 518 if (acquire_lock_) { 519 leaf->rw_lock_.Unlock(); 520 } 521 return old; 522 } else { 523 MS_LOG(DEBUG) << "Key not found. rc = " << static_cast<int>(rc) << "."; 524 return nullptr; 525 } 526 } else { 527 return nullptr; 528 } 529} 530 531} // namespace dataset 532} // namespace mindspore 533#endif 534