• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright 2019 Huawei Technologies Co., Ltd
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 #ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_UTIL_INDEX_H_
17 #define MINDSPORE_CCSRC_MINDDATA_DATASET_UTIL_INDEX_H_
18 
19 #include <algorithm>
20 #include <atomic>
21 #include <functional>
22 #include <utility>
23 #include <memory>
24 #include <deque>
25 #include "./securec.h"
26 #include "minddata/dataset/util/allocator.h"
27 #include "minddata/dataset/util/list.h"
28 #include "minddata/dataset/util/lock.h"
29 #include "minddata/dataset/util/memory_pool.h"
30 #include "minddata/dataset/util/services.h"
31 #include "minddata/dataset/util/status.h"
32 
33 namespace mindspore {
34 namespace dataset {
35 // Default traits for a B+ tree
36 struct BPlusTreeTraits {
37   // This determines the limit of number of keys in a node.
38   using slot_type = uint16_t;
39   // Number of slots in each leaf of the tree.
40   static constexpr slot_type kLeafSlots = 256;
41   // Number of slots in each inner node of the tree
42   static constexpr slot_type kInnerSlots = 128;
43 };
44 
45 /// Implementation of B+ tree
46 /// @tparam K -- the type of key
47 /// @tparam V -- the type of value
48 /// @tparam A -- allocator
49 /// @tparam C -- comparison class
50 /// @tparam T -- trait
51 template <typename K, typename V, typename A = std::allocator<V>, typename C = std::less<K>,
52           typename T = BPlusTreeTraits>
53 class BPlusTree {
54  public:
55   enum class IndexRc : char {
56     kOk = 0,
57     kDuplicateKey = 1,
58     kSlotFull = 2,
59     kKeyNotFound = 3,
60     kNullPointer = 4,
61     kOutOfMemory = 5,
62     kRetry = 6,
63     kUnexpectedError = 127
64   };
65 #define RETURN_IF_BAD_RC(_s)    \
66   do {                          \
67     IndexRc __rc = (_s);        \
68     if (__rc != IndexRc::kOk) { \
69       return __rc;              \
70     }                           \
71   } while (false)
72 
IndexRc2Status(IndexRc rc)73   Status IndexRc2Status(IndexRc rc) {
74     if (rc == IndexRc::kOk) {
75       return Status(StatusCode::kSuccess);
76     } else if (rc == IndexRc::kOutOfMemory) {
77       return Status(StatusCode::kMDOutOfMemory);
78     } else if (rc == IndexRc::kDuplicateKey) {
79       return Status(StatusCode::kMDDuplicateKey);
80     } else {
81       RETURN_STATUS_UNEXPECTED(std::to_string(static_cast<int>(rc)));
82     }
83   }
84 
85   using key_type = K;
86   using value_type = V;
87   using key_compare = C;
88   using slot_type = typename T::slot_type;
89   using traits = T;
90   using value_allocator = A;
91   using key_allocator = typename value_allocator::template rebind<key_type>::other;
92   using slot_allocator = typename value_allocator::template rebind<slot_type>::other;
93 
94   BPlusTree();
95 
96   explicit BPlusTree(const Allocator<V> &alloc);
97 
98   ~BPlusTree() noexcept;
99 
100   BPlusTree(const BPlusTree &) = delete;
101 
102   BPlusTree(BPlusTree &&) = delete;
103 
104   BPlusTree &operator=(const BPlusTree &) = delete;
105 
106   BPlusTree &operator=(BPlusTree &&) = delete;
107 
key_comp()108   key_compare key_comp() const { return key_less_; }
109 
size()110   size_t size() const { return stats_.size_; }
111 
empty()112   bool empty() const { return (size() == 0); }
113 
114   /// @param key
115   /// @param value
116   /// @return
117   Status DoInsert(const key_type &key, const value_type &value);
118   Status DoInsert(const key_type &key, std::unique_ptr<value_type> &&value);
119 
120   // Update a new value for a given key.
121   std::unique_ptr<value_type> DoUpdate(const key_type &key, const value_type &new_value);
122   std::unique_ptr<value_type> DoUpdate(const key_type &key, std::unique_ptr<value_type> &&new_value);
123 
124   // Statistics
125   struct tree_stats {
126     std::atomic<uint64_t> size_;
127     uint32_t leaves_;
128     uint32_t inner_nodes_;
129     uint32_t level_;
130 
tree_statstree_stats131     tree_stats() : size_(0), leaves_(0), inner_nodes_(0), level_(0) {}
132   };
133 
134   /// \brief Statistics functions
135   /// \return Return the height of the tree
GetHeight()136   auto GetHeight() const { return empty() ? 0 : stats_.level_ + 1; }
137   /// \return Order of the B+ tree
GetOrder()138   auto GetOrder() const { return traits::kLeafSlots; }
139   /// \return Number of leaves nodes
GetNumLeaves()140   auto GetNumLeaves() const { return stats_.leaves_; }
141   /// \return Number of inner nodes
GetNumInnerNodes()142   auto GetNumInnerNodes() const { return stats_.inner_nodes_; }
143 
144   /// \brief Toggle locking
145   /// \note Once locking is off. It is user's responsibility to ensure concurrency
SetLocking(bool on_off)146   void SetLocking(bool on_off) {
147     UniqueLock lck(&rw_lock_);
148     acquire_lock_ = on_off;
149   }
150 
LockShared()151   void LockShared() { rw_lock_.LockShared(); }
152 
LockExclusive()153   void LockExclusive() { rw_lock_.LockExclusive(); }
154 
Unlock()155   void Unlock() { rw_lock_.Unlock(); }
156 
157  private:
158   // Abstract class of a node (leaf or inner)
159   class BaseNode {
160    public:
161     friend class BPlusTree;
162 
163     virtual bool is_leafnode() const = 0;
164 
165     virtual bool is_full() const = 0;
166 
BaseNode(const value_allocator & alloc)167     explicit BaseNode(const value_allocator &alloc) : alloc_(alloc) {}
168 
169     virtual ~BaseNode() = default;
170 
171    protected:
172     mutable RWLock rw_lock_;
173     value_allocator alloc_;
174 
175    private:
176     Node<BaseNode> lru_;
177   };
178 
179   // This control block keeps track of all the nodes we traverse on insert.
180   // To maximize concurrency, internal nodes are latched S. If a node split
181   // is required, we must releases all the latches and redo it again and change
182   // the latch mode from S to X.
183   struct LockPathCB {
184     enum class LockMode : char { kShared = 0, kExclusive = 1, kNone = 2 };
185 
186     struct path {
187       BaseNode *node_;
188       bool locked_;
189 
pathLockPathCB::path190       path() : node_(nullptr), locked_(false) {}
191 
pathLockPathCB::path192       path(BaseNode *p, LockMode lockmode) : node_(p), locked_(false) {
193         if (lockmode == LockMode::kExclusive) {
194           p->rw_lock_.LockExclusive();
195           locked_ = true;
196         } else if (lockmode == LockMode::kShared) {
197           p->rw_lock_.LockShared();
198           locked_ = true;
199         }
200       }
201     };
202 
LockPathCBLockPathCB203     LockPathCB(BPlusTree *tree, bool retryWithXlock) : self_(tree), latch_shared_(true) {
204       if (retryWithXlock) {
205         latch_shared_ = false;
206       }
207       if (latch_shared_) {
208         tree->rw_lock_.LockShared();
209       } else {
210         tree->rw_lock_.LockExclusive();
211       }
212     }
213 
~LockPathCBLockPathCB214     ~LockPathCB() noexcept {
215       // Make sure all locks are released.
216       while (!paths_.empty()) {
217         path p = paths_.back();
218         paths_.pop_back();
219         if (p.locked_) {
220           p.node_->rw_lock_.Unlock();
221         }
222       }
223       self_->rw_lock_.Unlock();
224       self_ = nullptr;
225     }
226 
LockNodeLockPathCB227     void LockNode(BaseNode *p, LockMode locktype) { paths_.emplace_back(p, locktype); }
228 
UnlockMyParentsLockPathCB229     void UnlockMyParents(BaseNode *me) {
230       path p = paths_.front();
231       while (p.node_ != me) {
232         if (p.locked_) {
233           p.node_->rw_lock_.Unlock();
234         }
235         paths_.pop_front();
236         p = paths_.front();
237       }
238     }
239 
240     BPlusTree *self_;
241     std::deque<path> paths_;
242     bool latch_shared_;
243   };
244 
245   // Definition of inner node which fans to either inner node or leaf node.
246   class InnerNode : public BaseNode {
247    public:
248     friend class BPlusTree;
249 
250     using alloc_type = typename value_allocator::template rebind<InnerNode>::other;
251 
is_leafnode()252     bool is_leafnode() const override { return false; }
253 
is_full()254     bool is_full() const override { return (slotuse_ == traits::kInnerSlots); }
255 
256     IndexRc Sort();
257 
258     // 50/50 split
259     IndexRc Split(InnerNode *to, key_type *split_key);
260 
261     IndexRc InsertIntoSlot(slot_type slot, const key_type &key, BaseNode *ptr);
262 
InnerNode(const value_allocator & alloc)263     explicit InnerNode(const value_allocator &alloc) : BaseNode::BaseNode(alloc), slotuse_(0) {}
264 
265     ~InnerNode() = default;
266 
267     slot_type slot_dir_[traits::kInnerSlots] = {0};
268     key_type keys_[traits::kInnerSlots] = {0};
269     BaseNode *data_[traits::kInnerSlots + 1] = {nullptr};
270     slot_type slotuse_;
271   };
272 
273   // Definition of a leaf node which contains the key/value pair
274   class LeafNode : public BaseNode {
275    public:
276     friend class BPlusTree;
277 
278     using alloc_type = typename value_allocator::template rebind<LeafNode>::other;
279     Node<LeafNode> link_;
280 
is_leafnode()281     bool is_leafnode() const override { return true; }
282 
is_full()283     bool is_full() const override { return (slotuse_ == traits::kLeafSlots); }
284 
285     IndexRc Sort();
286 
287     // 50/50 split
288     IndexRc Split(LeafNode *to);
289 
290     IndexRc InsertIntoSlot(LockPathCB *insCB, slot_type slot, const key_type &key, std::unique_ptr<value_type> &&value);
291 
LeafNode(const value_allocator & alloc)292     explicit LeafNode(const value_allocator &alloc) : BaseNode::BaseNode(alloc), slotuse_(0) {}
293 
294     ~LeafNode() = default;
295 
296     slot_type slot_dir_[traits::kLeafSlots] = {0};
297     key_type keys_[traits::kLeafSlots] = {0};
298     std::unique_ptr<value_type> data_[traits::kLeafSlots];
299     slot_type slotuse_;
300   };
301 
302   mutable RWLock rw_lock_;
303   value_allocator alloc_;
304   // All the leaf nodes. Used by the iterator to traverse all the key/values.
305   List<LeafNode> leaf_nodes_;
306   // All the nodes (inner + leaf). Used by the destructor to free the memory of all the nodes.
307   List<BaseNode> all_;
308   // Pointer to the root of the tree.
309   BaseNode *root_;
310   // Key comparison object
311   key_compare key_less_;
312   // Stat
313   tree_stats stats_;
314   // lock mode
315   bool acquire_lock_;
316 
Init()317   void Init() {
318     typename LeafNode::alloc_type alloc(alloc_);
319     LeafNode *p = nullptr;
320     try {
321       p = alloc.allocate(1);
322     } catch (std::bad_alloc &e) {
323       p = nullptr;
324       return;
325     }
326     root_ = new (p) LeafNode(alloc_);
327     all_.Prepend(p);
328     leaf_nodes_.Append(p);
329     stats_.leaves_++;
330   }
331 
LessThan(const key_type & a,const key_type & b)332   bool LessThan(const key_type &a, const key_type &b) const { return key_less_(a, b); }
333 
EqualOrLessThan(const key_type & a,const key_type & b)334   bool EqualOrLessThan(const key_type &a, const key_type &b) const { return !key_less_(b, a); }
335 
Equal(const key_type & a,const key_type & b)336   bool Equal(const key_type &a, const key_type &b) const { return !key_less_(a, b) && !key_less_(b, a); }
337 
338   IndexRc AllocateInner(InnerNode **p);
339 
340   IndexRc AllocateLeaf(LeafNode **p);
341 
342   template <typename node_type>
343   slot_type FindSlot(const node_type *node, const key_type &key, bool *duplicate = nullptr) const {
344     slot_type lo = 0;
345     while (lo < node->slotuse_ && key_comp()(node->keys_[node->slot_dir_[lo]], key)) {
346       ++lo;
347     }
348     bool keymatch = (lo < node->slotuse_ && Equal(key, node->keys_[node->slot_dir_[lo]]));
349     if (keymatch && !node->is_leafnode()) {
350       // For an inner node and we match a key during search, we should look into the next slot.
351       ++lo;
352     }
353     if (duplicate != nullptr) {
354       *duplicate = keymatch;
355     }
356     return lo;
357   }
358 
359   IndexRc LeafInsertKeyValue(LockPathCB *ins_cb, LeafNode *node, const key_type &key,
360                              std::unique_ptr<value_type> &&value, key_type *split_key, LeafNode **split_node);
361 
362   IndexRc InnerInsertKeyChild(InnerNode *node, const key_type &key, BaseNode *ptr, key_type *split_key,
363                               InnerNode **split_node);
364 
FindBranch(InnerNode * inner,slot_type slot)365   inline BaseNode *FindBranch(InnerNode *inner, slot_type slot) const {
366     BaseNode *child = nullptr;
367     if (slot == 0) {
368       child = inner->data_[0];
369     } else {
370       child = inner->data_[inner->slot_dir_[slot - 1] + 1];
371     }
372     return child;
373   }
374 
375   IndexRc InsertKeyValue(LockPathCB *ins_cb, BaseNode *n, const key_type &key, std::unique_ptr<value_type> &&value,
376                          key_type *split_key, BaseNode **split_node);
377 
378   IndexRc Locate(RWLock *parent_lock, bool forUpdate, BaseNode *top, const key_type &key, LeafNode **ln,
379                  slot_type *s) const;
380 
381  public:
382   class Iterator : public std::iterator<std::bidirectional_iterator_tag, value_type> {
383    public:
384     using reference = BPlusTree::value_type &;
385     using pointer = BPlusTree::value_type *;
386 
Iterator(BPlusTree * btree)387     explicit Iterator(BPlusTree *btree) : cur_(btree->leaf_nodes_.head), slot_(0), locked_(false) {}
388 
cur_(leaf)389     Iterator(LeafNode *leaf, slot_type slot, bool locked = false) : cur_(leaf), slot_(slot), locked_(locked) {}
390 
391     ~Iterator();
392 
393     explicit Iterator(const Iterator &);
394 
395     Iterator &operator=(const Iterator &lhs);
396 
397     explicit Iterator(Iterator &&) noexcept;
398 
399     Iterator &operator=(Iterator &&lhs);
400 
401     pointer operator->() const { return cur_->data_[cur_->slot_dir_[slot_]].get(); }
402 
403     reference operator*() const { return *(cur_->data_[cur_->slot_dir_[slot_]].get()); }
404 
key()405     const key_type &key() const { return cur_->keys_[cur_->slot_dir_[slot_]]; }
406 
value()407     value_type &value() const { return *(cur_->data_[cur_->slot_dir_[slot_]].get()); }
408 
409     // Prefix++
410     Iterator &operator++();
411 
412     // Postfix++
413     Iterator operator++(int);
414 
415     // Prefix--
416     Iterator &operator--();
417 
418     // Postfix--
419     Iterator operator--(int);
420 
421     bool operator==(const Iterator &x) const { return (x.cur_ == cur_) && (x.slot_ == slot_); }
422     bool operator!=(const Iterator &x) const { return (x.cur_ != cur_) || (x.slot_ != slot_); }
423 
LockShared()424     void LockShared() {
425       cur_->rw_lock_.LockShared();
426       locked_ = true;
427     }
428 
LockExclusive()429     void LockExclusive() {
430       cur_->rw_lock_.LockExclusive();
431       locked_ = true;
432     }
433 
Unlock()434     void Unlock() {
435       cur_->rw_lock_.Unlock();
436       locked_ = false;
437     }
438 
439    private:
440     typename BPlusTree::LeafNode *cur_;
441     slot_type slot_;
442     bool locked_;
443   };
444 
445   class ConstIterator : public std::iterator<std::bidirectional_iterator_tag, value_type> {
446    public:
447     using reference = BPlusTree::value_type &;
448     using pointer = BPlusTree::value_type *;
449 
ConstIterator(const BPlusTree * btree)450     explicit ConstIterator(const BPlusTree *btree) : cur_(btree->leaf_nodes_.head), slot_(0), locked_(false) {}
451 
452     ~ConstIterator();
453 
454     ConstIterator(const LeafNode *leaf, slot_type slot, bool locked = false)
cur_(leaf)455         : cur_(leaf), slot_(slot), locked_(locked) {}
456 
457     explicit ConstIterator(const ConstIterator &);
458 
459     ConstIterator &operator=(const ConstIterator &lhs);
460 
461     explicit ConstIterator(ConstIterator &&) noexcept;
462 
463     ConstIterator &operator=(ConstIterator &&lhs);
464 
465     pointer operator->() const { return cur_->data_[cur_->slot_dir_[slot_]].get(); }
466 
467     reference operator*() const { return *(cur_->data_[cur_->slot_dir_[slot_]].get()); }
468 
key()469     const key_type &key() const { return cur_->keys_[cur_->slot_dir_[slot_]]; }
470 
value()471     value_type &value() const { return *(cur_->data_[cur_->slot_dir_[slot_]].get()); }
472 
473     // Prefix++
474     ConstIterator &operator++();
475 
476     // Postfix++
477     ConstIterator operator++(int);
478 
479     // Prefix--
480     ConstIterator &operator--();
481 
482     // Postfix--
483     ConstIterator operator--(int);
484 
485     bool operator==(const ConstIterator &x) const { return (x.cur_ == cur_) && (x.slot_ == slot_); }
486     bool operator!=(const ConstIterator &x) const { return (x.cur_ != cur_) || (x.slot_ != slot_); }
487 
LockShared()488     void LockShared() {
489       cur_->rw_lock_.LockShared();
490       locked_ = true;
491     }
492 
LockExclusive()493     void LockExclusive() {
494       cur_->rw_lock_.LockExclusive();
495       locked_ = true;
496     }
497 
Unlock()498     void Unlock() {
499       cur_->rw_lock_.Unlock();
500       locked_ = false;
501     }
502 
503    private:
504     const typename BPlusTree::LeafNode *cur_;
505     slot_type slot_;
506     bool locked_;
507   };
508 
509   Iterator begin();
510   Iterator end();
511 
512   ConstIterator begin() const;
513   ConstIterator end() const;
514 
515   ConstIterator cbegin() const;
516   ConstIterator cend() const;
517 
518   // Locate the entry with key
519   std::pair<ConstIterator, bool> Search(const key_type &key) const;
520   std::pair<Iterator, bool> Search(const key_type &key);
521 
522   value_type operator[](key_type key);
523 };
524 }  // namespace dataset
525 }  // namespace mindspore
526 #endif  // MINDSPORE_CCSRC_MINDDATA_DATASET_UTIL_INDEX_H_
527 
528 #include "btree_impl.tpp"
529 #include "btree_iterator.tpp"
530