1 /** 2 * Copyright 2020 Huawei Technologies Co., Ltd 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 #ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_UTIL_CACHE_POOL_H_ 17 #define MINDSPORE_CCSRC_MINDDATA_DATASET_UTIL_CACHE_POOL_H_ 18 19 #include <memory> 20 #include <mutex> 21 #include <string> 22 #include <utility> 23 #include <vector> 24 #include "minddata/dataset/engine/cache/cache_common.h" 25 #include "minddata/dataset/engine/cache/cache_numa.h" 26 #include "minddata/dataset/engine/cache/storage_manager.h" 27 #include "minddata/dataset/util/allocator.h" 28 #include "minddata/dataset/util/service.h" 29 #include "minddata/dataset/util/slice.h" 30 #include "minddata/dataset/util/auto_index.h" 31 #include "minddata/dataset/util/btree.h" 32 33 namespace mindspore { 34 namespace dataset { 35 /// \brief A CachePool provides service for backup/restore a buffer. A buffer can be represented in a form of vector of 36 /// ReadableSlice where all memory blocks will be copied to one contiguous block which can be in memory or spilled to 37 /// disk (if a disk directory is provided). User must provide a key to insert the buffer. 38 /// \see ReadableSlice 39 class CachePool : public Service { 40 public: 41 using base_type = uint8_t; 42 using pointer = base_type *; 43 using const_pointer = const base_type *; 44 using reference = base_type &; 45 using const_reference = const base_type &; 46 using value_allocator = Allocator<base_type>; 47 48 // An internal class to locate the whereabouts of a backed up buffer which can be either in 49 class DataLocator { 50 public: DataLocator()51 DataLocator() : ptr(nullptr), sz(0), node_id(0), node_hit(false), storage_key(0) {} 52 ~DataLocator() = default; 53 DataLocator(const DataLocator &other) = default; 54 DataLocator &operator=(const DataLocator &other) = default; DataLocator(DataLocator && other)55 DataLocator(DataLocator &&other) noexcept { 56 ptr = other.ptr; 57 sz = other.sz; 58 node_id = other.node_id; 59 node_hit = other.node_hit; 60 storage_key = other.storage_key; 61 other.ptr = nullptr; 62 other.sz = 0; 63 other.storage_key = 0; 64 } 65 DataLocator &operator=(DataLocator &&other) noexcept { 66 if (&other != this) { 67 ptr = other.ptr; 68 sz = other.sz; 69 node_id = other.node_id; 70 node_hit = other.node_hit; 71 storage_key = other.storage_key; 72 other.ptr = nullptr; 73 other.sz = 0; 74 other.storage_key = 0; 75 } 76 return *this; 77 } 78 pointer ptr; 79 size_t sz; 80 numa_id_t node_id; // where the numa node the memory is allocated to 81 bool node_hit; // we can allocate to the preferred node 82 StorageManager::key_type storage_key; 83 }; 84 85 using data_index = BPlusTree<int64_t, DataLocator>; 86 using key_type = data_index::key_type; 87 using bl_alloc_type = typename value_allocator::template rebind<DataLocator>::other; 88 89 /// \brief Simple statistics returned from CachePool like how many elements are cached in memory and 90 /// how many elements are spilled to disk. 91 struct CacheStat { 92 key_type min_key; 93 key_type max_key; 94 int64_t num_mem_cached; 95 int64_t num_disk_cached; 96 int64_t average_cache_sz; 97 int64_t num_numa_hit; 98 std::vector<key_type> gap; 99 }; 100 101 /// \brief Constructor 102 /// \param alloc Allocator to allocate memory from 103 /// \param root Optional disk folder to spill 104 explicit CachePool(std::shared_ptr<NumaMemoryPool> mp, const std::string &root = ""); 105 106 CachePool(const CachePool &) = delete; 107 CachePool(CachePool &&) = delete; 108 CachePool &operator=(const CachePool &) = delete; 109 CachePool &operator=(CachePool &&) = delete; 110 ~CachePool() noexcept override; 111 112 Status DoServiceStart() override; 113 Status DoServiceStop() override; 114 115 Path GetSpillPath() const; 116 117 /// \brief Insert a sequence of ReadableSlice objects into the pool. 118 /// All memory blocks will be consolidated into one contiguous block and be cached in either memory or on disk. 119 /// \param[in] key User supplied key 120 /// \param[in] buf A sequence of ReadableSlice objects. 121 /// \param[in] writeToDiskDirectly If true, no spill to disk if spill is enabled, or return no memory 122 /// \return Error code 123 Status Insert(CachePool::key_type key, const std::vector<ReadableSlice> &buf); 124 125 /// \brief Restore a cached buffer (from memory or disk) 126 /// \param[in] key A previous key returned from Insert 127 /// \param[out] dest The cached buffer will be copied to this destination represented by a WritableSlice 128 /// \param[out] bytesRead Optional. Number of bytes read. 129 /// \return Error code 130 Status Read(key_type key, WritableSlice *dest, size_t *bytesRead = nullptr) const; 131 132 /// \brief Serialize a DataLocator 133 Status GetDataLocator(key_type, const std::shared_ptr<flatbuffers::FlatBufferBuilder> &, 134 flatbuffers::Offset<DataLocatorMsg> *) const; 135 136 /// \brief Get statistics. 137 /// \return CacheStat object 138 CacheStat GetStat(bool GetMissingKeys = false) const; 139 MyName()140 std::string MyName() const { return subfolder_; } 141 142 /// \brief Toggle locking 143 /// \note Once locking is off. It is user's responsibility to ensure concurrency SetLocking(bool on_off)144 void SetLocking(bool on_off) { tree_->SetLocking(on_off); } 145 146 private: 147 std::shared_ptr<NumaMemoryPool> mp_; 148 Path root_; 149 const std::string subfolder_; 150 std::shared_ptr<StorageManager> sm_; 151 std::shared_ptr<data_index> tree_; 152 std::atomic<uint64_t> soft_mem_limit_; // the available memory in the machine 153 std::atomic<uint64_t> temp_mem_usage_; // temporary count on the amount of memory usage by cache every 100Mb (because 154 // we will adjust soft_mem_limit_ every 100Mb based on this parameter) 155 uint64_t min_avail_mem_; // lower bound of the available memory 156 const int kMemoryCapAdjustInterval = 104857600; 157 }; 158 } // namespace dataset 159 } // namespace mindspore 160 #endif 161