• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright 2019 Huawei Technologies Co., Ltd
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef MINDSPORE_CCSRC_BACKEND_OPTIMIZER_MEM_REUSE_MEM_DYNAMIC_ALLOCATOR_H_
18 #define MINDSPORE_CCSRC_BACKEND_OPTIMIZER_MEM_REUSE_MEM_DYNAMIC_ALLOCATOR_H_
19 
20 #include <memory>
21 #include <map>
22 #include <vector>
23 #include <algorithm>
24 #include <utility>
25 #include <thread>
26 #include <mutex>
27 
28 namespace mindspore {
29 namespace device {
30 using DeviceMemPtr = void(*);
31 
32 // The status of memory buf.
33 enum DynamicMemBufStatus : int { kMemBufIdle, kMemBufUsed };
34 
35 // Alloc memory aligned according to 512 bytes.
36 static const size_t DYNAMIC_MEM_ALIGN_SIZE = 512;
37 
38 // The minimum unit size (1G) of memory block used for dynamic extend.
39 static const size_t DYNAMIC_MEM_ALLOC_UNIT_SIZE = 1024 << 20;
40 
41 // The Comparator of device address from small to large.
42 struct DeviceAddrCmp {
operatorDeviceAddrCmp43   bool operator()(const DeviceMemPtr &addr1, const DeviceMemPtr &addr2) const { return addr1 < addr2; }
44 };
45 
46 // Memory buf is the smallest operation object of dynamic memory pool.
47 struct DynamicMemBuf {
DynamicMemBufDynamicMemBuf48   DynamicMemBuf(DeviceMemPtr addr, DynamicMemBufStatus status, size_t size)
49       : device_addr_(addr), status_(status), size_(size) {}
50   DeviceMemPtr device_addr_;
51   DynamicMemBufStatus status_;
52   size_t size_;
53 };
54 using DynamicMemBufPtr = std::shared_ptr<DynamicMemBuf>;
55 // Multimap key is the tensor size, for finding the idle memory buf by tensor size.
56 using SizeMapMemBuf = std::multimap<size_t, DynamicMemBufPtr>;
57 // Map key is the device address, for finding the used memory buf in memory block by device address.
58 using DeviceAddrMapMemBuf = std::map<DeviceMemPtr, DynamicMemBufPtr, DeviceAddrCmp>;
59 
60 // Memory block is composed of memory buf.
61 class DynamicMemBlock {
62  public:
63   DynamicMemBlock() = default;
DynamicMemBlock(DeviceMemPtr addr_base,size_t size)64   DynamicMemBlock(DeviceMemPtr addr_base, size_t size) : device_addr_base_(addr_base), mem_block_size_(size) {}
~DynamicMemBlock()65   ~DynamicMemBlock() { block_all_mem_buf_map_.clear(); }
device_addr()66   const DeviceMemPtr &device_addr() const { return device_addr_base_; }
size()67   size_t size() const { return mem_block_size_; }
68 
69  private:
70   friend class DynamicMemPoolBestFit;
71 
72   // The map of all memory buf in this memory block by device address.
73   DeviceAddrMapMemBuf block_all_mem_buf_map_;
74 
75   DeviceMemPtr device_addr_base_{nullptr};
76   size_t mem_block_size_{0};
77 };
78 using DynamicMemBlockPtr = std::shared_ptr<DynamicMemBlock>;
79 
80 // The main class of dynamic memory pool.
81 class DynamicMemPoolBestFit {
82  public:
83   DynamicMemPoolBestFit() = default;
84   virtual ~DynamicMemPoolBestFit();
85 
86   // The main program entry of memory alloc.
87   DeviceMemPtr AllocTensorMem(size_t size);
88   // The main program entry of continuous memory alloc.
89   std::vector<DeviceMemPtr> AllocContinuousTensorMem(size_t total_size, std::vector<size_t> size_list);
90   // The main program entry of memory free.
91   void FreeTensorMem(const DeviceMemPtr &device_addr);
92 
93   // Release the real device memory.
94   void ReleaseDeviceRes();
95   // Display the information of memory block and memory buf.
96   void DumpDynamicMemPoolInfo();
97   // Get the map of global idle mem buf and size.
global_idle_mem_buf_map()98   SizeMapMemBuf global_idle_mem_buf_map() {
99     std::lock_guard<std::mutex> locker(mutex_);
100     return global_idle_mem_buf_map_;
101   }
102 
103   // Get the minimum memory unit size using for dynamic extend.
mem_alloc_unit_size()104   size_t mem_alloc_unit_size() const { return mem_alloc_unit_size_; }
105   // Set the minimum memory unit size using for dynamic extend.
set_mem_alloc_unit_size(const size_t & size)106   void set_mem_alloc_unit_size(const size_t &size) { mem_alloc_unit_size_ = size; }
107 
108   // Get the related memory statistics information.
total_mem_statistics()109   size_t total_mem_statistics() const { return total_mem_statistics_; }
used_mem_statistics()110   size_t used_mem_statistics() const { return total_used_mem_statistics_; }
used_mem_peak_statistics()111   size_t used_mem_peak_statistics() const { return used_mem_peak_statistics_; }
112 
113   // The related interface of device memory real operation, needs override by device type.
114   virtual size_t AllocDeviceMem(size_t size, DeviceMemPtr *addr) = 0;
115   virtual bool FreeDeviceMem(const DeviceMemPtr &addr) = 0;
116   virtual size_t free_mem_size() = 0;
117   virtual size_t total_mem_size() = 0;
118 
119  protected:
120   // The real size by memory alloc aligned.
121   virtual size_t AlignMemorySize(size_t size) const;
122   // Calculate memory block required alloc size when adding the memory block.
123   virtual size_t CalMemBlockAllocSize(size_t size);
124 
125  private:
126   // Find the idle memory buf by aligned size when memory alloc.
127   DeviceMemPtr FindIdleMemBuf(size_t size);
128   // Add the memory block and memory buf when memory alloc not find the idle memory buf.
129   DeviceMemPtr AddMemBlockAndMemBuf(size_t size);
130   // Judge whether need divide the memory buf by alloc size and memory buf size.
131   bool IsDivide(size_t tensor_size, size_t mem_buf_size) const;
132   // Divide the memory buf by alloc size.
133   void DivideMemBuf(size_t size, const DynamicMemBufPtr &mem_buf);
134   // Find the memory block by device address.
135   DynamicMemBlockPtr FindMemBlock(const DeviceMemPtr &device_addr);
136   // The Comparator of memory block by device address, because memory blocks are arranged in order by device address.
137   static bool CmpMemBlock(const DeviceMemPtr &device_addr, const DynamicMemBlockPtr &mem_block);
138 
139   // Combine the memory buf when memory free, to avoid the memory fragmentation.
140   void CombineMemBuf(const DynamicMemBlockPtr &mem_block, const DeviceMemPtr &device_addr);
141   // Erase the idle memory buf by size and device address when idle memory buf is combined.
142   void EraseIdleMemBuf(size_t size, const DeviceMemPtr &device_addr);
143 
144   // The global memory block list which is arranged in order by base device address of memory block.
145   std::vector<DynamicMemBlockPtr> global_mem_block_list_;
146   // The map of all idle memory buf by size.
147   SizeMapMemBuf global_idle_mem_buf_map_;
148 
149   // The related memory statistics information.
150   size_t total_mem_statistics_{0};
151   size_t total_used_mem_statistics_{0};
152   size_t used_mem_peak_statistics_{0};
153 
154   // The minimum memory unit size.
155   size_t mem_alloc_unit_size_{DYNAMIC_MEM_ALLOC_UNIT_SIZE};
156 
157   // Support multi-thread.
158   std::mutex mutex_;
159 };
160 }  // namespace device
161 }  // namespace mindspore
162 
163 #endif  // MINDSPORE_CCSRC_BACKEND_OPTIMIZER_MEM_REUSE_MEM_DYNAMIC_ALLOCATOR_H_
164