• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright 2020 Huawei Technologies Co., Ltd
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 #ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_UTIL_CACHE_POOL_H_
17 #define MINDSPORE_CCSRC_MINDDATA_DATASET_UTIL_CACHE_POOL_H_
18 
19 #include <memory>
20 #include <mutex>
21 #include <string>
22 #include <utility>
23 #include <vector>
24 #include "minddata/dataset/engine/cache/cache_common.h"
25 #include "minddata/dataset/engine/cache/cache_numa.h"
26 #include "minddata/dataset/engine/cache/storage_manager.h"
27 #include "minddata/dataset/util/allocator.h"
28 #include "minddata/dataset/util/service.h"
29 #include "minddata/dataset/util/slice.h"
30 #include "minddata/dataset/util/auto_index.h"
31 #include "minddata/dataset/util/btree.h"
32 
33 namespace mindspore {
34 namespace dataset {
35 /// \brief A CachePool provides service for backup/restore a buffer. A buffer can be represented in a form of vector of
36 /// ReadableSlice where all memory blocks will be copied to one contiguous block which can be in memory or spilled to
37 /// disk (if a disk directory is provided). User must provide a key to insert the buffer.
38 /// \see ReadableSlice
39 class CachePool : public Service {
40  public:
41   using base_type = uint8_t;
42   using pointer = base_type *;
43   using const_pointer = const base_type *;
44   using reference = base_type &;
45   using const_reference = const base_type &;
46   using value_allocator = Allocator<base_type>;
47 
48   // An internal class to locate the whereabouts of a backed up buffer which can be either in
49   class DataLocator {
50    public:
DataLocator()51     DataLocator() : ptr(nullptr), sz(0), node_id(0), node_hit(false), storage_key(0) {}
52     ~DataLocator() = default;
53     DataLocator(const DataLocator &other) = default;
54     DataLocator &operator=(const DataLocator &other) = default;
DataLocator(DataLocator && other)55     DataLocator(DataLocator &&other) noexcept {
56       ptr = other.ptr;
57       sz = other.sz;
58       node_id = other.node_id;
59       node_hit = other.node_hit;
60       storage_key = other.storage_key;
61       other.ptr = nullptr;
62       other.sz = 0;
63       other.storage_key = 0;
64     }
65     DataLocator &operator=(DataLocator &&other) noexcept {
66       if (&other != this) {
67         ptr = other.ptr;
68         sz = other.sz;
69         node_id = other.node_id;
70         node_hit = other.node_hit;
71         storage_key = other.storage_key;
72         other.ptr = nullptr;
73         other.sz = 0;
74         other.storage_key = 0;
75       }
76       return *this;
77     }
78     pointer ptr;
79     size_t sz;
80     numa_id_t node_id;  // where the numa node the memory is allocated to
81     bool node_hit;      // we can allocate to the preferred node
82     StorageManager::key_type storage_key;
83   };
84 
85   using data_index = BPlusTree<int64_t, DataLocator>;
86   using key_type = data_index::key_type;
87   using bl_alloc_type = typename value_allocator::template rebind<DataLocator>::other;
88 
89   /// \brief Simple statistics returned from CachePool like how many elements are cached in memory and
90   /// how many elements are spilled to disk.
91   struct CacheStat {
92     key_type min_key;
93     key_type max_key;
94     int64_t num_mem_cached;
95     int64_t num_disk_cached;
96     int64_t average_cache_sz;
97     int64_t num_numa_hit;
98     std::vector<key_type> gap;
99   };
100 
101   /// \brief Constructor
102   /// \param alloc Allocator to allocate memory from
103   /// \param root Optional disk folder to spill
104   explicit CachePool(std::shared_ptr<NumaMemoryPool> mp, const std::string &root = "");
105 
106   CachePool(const CachePool &) = delete;
107   CachePool(CachePool &&) = delete;
108   CachePool &operator=(const CachePool &) = delete;
109   CachePool &operator=(CachePool &&) = delete;
110   ~CachePool() noexcept override;
111 
112   Status DoServiceStart() override;
113   Status DoServiceStop() override;
114 
115   Path GetSpillPath() const;
116 
117   /// \brief Insert a sequence of ReadableSlice objects into the pool.
118   /// All memory blocks will be consolidated into one contiguous block and be cached in either memory or on disk.
119   /// \param[in] key User supplied key
120   /// \param[in] buf A sequence of ReadableSlice objects.
121   /// \param[in] writeToDiskDirectly If true, no spill to disk if spill is enabled, or return no memory
122   /// \return Error code
123   Status Insert(CachePool::key_type key, const std::vector<ReadableSlice> &buf);
124 
125   /// \brief Restore a cached buffer (from memory or disk)
126   /// \param[in] key A previous key returned from Insert
127   /// \param[out] dest The cached buffer will be copied to this destination represented by a WritableSlice
128   /// \param[out] bytesRead Optional. Number of bytes read.
129   /// \return Error code
130   Status Read(key_type key, WritableSlice *dest, size_t *bytesRead = nullptr) const;
131 
132   /// \brief Serialize a DataLocator
133   Status GetDataLocator(key_type, const std::shared_ptr<flatbuffers::FlatBufferBuilder> &,
134                         flatbuffers::Offset<DataLocatorMsg> *) const;
135 
136   /// \brief Get statistics.
137   /// \return CacheStat object
138   CacheStat GetStat(bool GetMissingKeys = false) const;
139 
MyName()140   std::string MyName() const { return subfolder_; }
141 
142   /// \brief Toggle locking
143   /// \note Once locking is off. It is user's responsibility to ensure concurrency
SetLocking(bool on_off)144   void SetLocking(bool on_off) { tree_->SetLocking(on_off); }
145 
146  private:
147   std::shared_ptr<NumaMemoryPool> mp_;
148   Path root_;
149   const std::string subfolder_;
150   std::shared_ptr<StorageManager> sm_;
151   std::shared_ptr<data_index> tree_;
152   std::atomic<uint64_t> soft_mem_limit_;  // the available memory in the machine
153   std::atomic<uint64_t> temp_mem_usage_;  // temporary count on the amount of memory usage by cache every 100Mb (because
154                                           // we will adjust soft_mem_limit_ every 100Mb based on this parameter)
155   uint64_t min_avail_mem_;                // lower bound of the available memory
156   const int kMemoryCapAdjustInterval = 104857600;
157 };
158 }  // namespace dataset
159 }  // namespace mindspore
160 #endif
161