1 /** 2 * Copyright 2019 Huawei Technologies Co., Ltd 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef MINDSPORE_CCSRC_BACKEND_OPTIMIZER_MEM_REUSE_MEM_DYNAMIC_ALLOCATOR_H_ 18 #define MINDSPORE_CCSRC_BACKEND_OPTIMIZER_MEM_REUSE_MEM_DYNAMIC_ALLOCATOR_H_ 19 20 #include <memory> 21 #include <map> 22 #include <vector> 23 #include <algorithm> 24 #include <utility> 25 #include <thread> 26 #include <mutex> 27 28 namespace mindspore { 29 namespace device { 30 using DeviceMemPtr = void(*); 31 32 // The status of memory buf. 33 enum DynamicMemBufStatus : int { kMemBufIdle, kMemBufUsed }; 34 35 // Alloc memory aligned according to 512 bytes. 36 static const size_t DYNAMIC_MEM_ALIGN_SIZE = 512; 37 38 // The minimum unit size (1G) of memory block used for dynamic extend. 39 static const size_t DYNAMIC_MEM_ALLOC_UNIT_SIZE = 1024 << 20; 40 41 // The Comparator of device address from small to large. 42 struct DeviceAddrCmp { operatorDeviceAddrCmp43 bool operator()(const DeviceMemPtr &addr1, const DeviceMemPtr &addr2) const { return addr1 < addr2; } 44 }; 45 46 // Memory buf is the smallest operation object of dynamic memory pool. 47 struct DynamicMemBuf { DynamicMemBufDynamicMemBuf48 DynamicMemBuf(DeviceMemPtr addr, DynamicMemBufStatus status, size_t size) 49 : device_addr_(addr), status_(status), size_(size) {} 50 DeviceMemPtr device_addr_; 51 DynamicMemBufStatus status_; 52 size_t size_; 53 }; 54 using DynamicMemBufPtr = std::shared_ptr<DynamicMemBuf>; 55 // Multimap key is the tensor size, for finding the idle memory buf by tensor size. 56 using SizeMapMemBuf = std::multimap<size_t, DynamicMemBufPtr>; 57 // Map key is the device address, for finding the used memory buf in memory block by device address. 58 using DeviceAddrMapMemBuf = std::map<DeviceMemPtr, DynamicMemBufPtr, DeviceAddrCmp>; 59 60 // Memory block is composed of memory buf. 61 class DynamicMemBlock { 62 public: 63 DynamicMemBlock() = default; DynamicMemBlock(DeviceMemPtr addr_base,size_t size)64 DynamicMemBlock(DeviceMemPtr addr_base, size_t size) : device_addr_base_(addr_base), mem_block_size_(size) {} ~DynamicMemBlock()65 ~DynamicMemBlock() { block_all_mem_buf_map_.clear(); } device_addr()66 const DeviceMemPtr &device_addr() const { return device_addr_base_; } size()67 size_t size() const { return mem_block_size_; } 68 69 private: 70 friend class DynamicMemPoolBestFit; 71 72 // The map of all memory buf in this memory block by device address. 73 DeviceAddrMapMemBuf block_all_mem_buf_map_; 74 75 DeviceMemPtr device_addr_base_{nullptr}; 76 size_t mem_block_size_{0}; 77 }; 78 using DynamicMemBlockPtr = std::shared_ptr<DynamicMemBlock>; 79 80 // The main class of dynamic memory pool. 81 class DynamicMemPoolBestFit { 82 public: 83 DynamicMemPoolBestFit() = default; 84 virtual ~DynamicMemPoolBestFit(); 85 86 // The main program entry of memory alloc. 87 DeviceMemPtr AllocTensorMem(size_t size); 88 // The main program entry of continuous memory alloc. 89 std::vector<DeviceMemPtr> AllocContinuousTensorMem(size_t total_size, std::vector<size_t> size_list); 90 // The main program entry of memory free. 91 void FreeTensorMem(const DeviceMemPtr &device_addr); 92 93 // Release the real device memory. 94 void ReleaseDeviceRes(); 95 // Display the information of memory block and memory buf. 96 void DumpDynamicMemPoolInfo(); 97 // Get the map of global idle mem buf and size. global_idle_mem_buf_map()98 SizeMapMemBuf global_idle_mem_buf_map() { 99 std::lock_guard<std::mutex> locker(mutex_); 100 return global_idle_mem_buf_map_; 101 } 102 103 // Get the minimum memory unit size using for dynamic extend. mem_alloc_unit_size()104 size_t mem_alloc_unit_size() const { return mem_alloc_unit_size_; } 105 // Set the minimum memory unit size using for dynamic extend. set_mem_alloc_unit_size(const size_t & size)106 void set_mem_alloc_unit_size(const size_t &size) { mem_alloc_unit_size_ = size; } 107 108 // Get the related memory statistics information. total_mem_statistics()109 size_t total_mem_statistics() const { return total_mem_statistics_; } used_mem_statistics()110 size_t used_mem_statistics() const { return total_used_mem_statistics_; } used_mem_peak_statistics()111 size_t used_mem_peak_statistics() const { return used_mem_peak_statistics_; } 112 113 // The related interface of device memory real operation, needs override by device type. 114 virtual size_t AllocDeviceMem(size_t size, DeviceMemPtr *addr) = 0; 115 virtual bool FreeDeviceMem(const DeviceMemPtr &addr) = 0; 116 virtual size_t free_mem_size() = 0; 117 virtual size_t total_mem_size() = 0; 118 119 protected: 120 // The real size by memory alloc aligned. 121 virtual size_t AlignMemorySize(size_t size) const; 122 // Calculate memory block required alloc size when adding the memory block. 123 virtual size_t CalMemBlockAllocSize(size_t size); 124 125 private: 126 // Find the idle memory buf by aligned size when memory alloc. 127 DeviceMemPtr FindIdleMemBuf(size_t size); 128 // Add the memory block and memory buf when memory alloc not find the idle memory buf. 129 DeviceMemPtr AddMemBlockAndMemBuf(size_t size); 130 // Judge whether need divide the memory buf by alloc size and memory buf size. 131 bool IsDivide(size_t tensor_size, size_t mem_buf_size) const; 132 // Divide the memory buf by alloc size. 133 void DivideMemBuf(size_t size, const DynamicMemBufPtr &mem_buf); 134 // Find the memory block by device address. 135 DynamicMemBlockPtr FindMemBlock(const DeviceMemPtr &device_addr); 136 // The Comparator of memory block by device address, because memory blocks are arranged in order by device address. 137 static bool CmpMemBlock(const DeviceMemPtr &device_addr, const DynamicMemBlockPtr &mem_block); 138 139 // Combine the memory buf when memory free, to avoid the memory fragmentation. 140 void CombineMemBuf(const DynamicMemBlockPtr &mem_block, const DeviceMemPtr &device_addr); 141 // Erase the idle memory buf by size and device address when idle memory buf is combined. 142 void EraseIdleMemBuf(size_t size, const DeviceMemPtr &device_addr); 143 144 // The global memory block list which is arranged in order by base device address of memory block. 145 std::vector<DynamicMemBlockPtr> global_mem_block_list_; 146 // The map of all idle memory buf by size. 147 SizeMapMemBuf global_idle_mem_buf_map_; 148 149 // The related memory statistics information. 150 size_t total_mem_statistics_{0}; 151 size_t total_used_mem_statistics_{0}; 152 size_t used_mem_peak_statistics_{0}; 153 154 // The minimum memory unit size. 155 size_t mem_alloc_unit_size_{DYNAMIC_MEM_ALLOC_UNIT_SIZE}; 156 157 // Support multi-thread. 158 std::mutex mutex_; 159 }; 160 } // namespace device 161 } // namespace mindspore 162 163 #endif // MINDSPORE_CCSRC_BACKEND_OPTIMIZER_MEM_REUSE_MEM_DYNAMIC_ALLOCATOR_H_ 164