1 /**
2 * Copyright 2023 Huawei Technologies Co., Ltd
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "runtime/device/gsm/pin_mem_pool.h"
18 #include <string>
19 #include <algorithm>
20 #include <cstdlib>
21 #include "utils/log_adapter.h"
22 #include "utils/ms_context.h"
23 #include "include/common/utils/offload_context.h"
24 #include "include/common/utils/comm_manager.h"
25 #include "include/backend/distributed/collective/collective_manager.h"
26
27 namespace mindspore {
28 namespace device {
29 namespace {
30 constexpr size_t kMemPoolAlignSize = 512;
31 constexpr char HCCL_WORLD_GROUP[] = "hccl_world_group";
32 constexpr char NCCL_WORLD_GROUP[] = "nccl_world_group";
GetLocalRankSize()33 size_t GetLocalRankSize() {
34 auto ms_context = MsContext::GetInstance();
35 MS_EXCEPTION_IF_NULL(ms_context);
36 std::string backend = ms_context->get_param<std::string>(MS_CTX_DEVICE_TARGET);
37 std::string world_group;
38 if (backend == kAscendDevice || backend == kDavinciDevice) {
39 world_group = HCCL_WORLD_GROUP;
40 } else if (backend == kGPUDevice) {
41 world_group = NCCL_WORLD_GROUP;
42 } else {
43 MS_LOG(ERROR) << "Invalid communication backend: " << backend << ", currently only support Ascend/GPU backend.";
44 return 1;
45 }
46 const auto &collective_manager = distributed::collective::CollectiveManager::instance();
47 MS_EXCEPTION_IF_NULL(collective_manager);
48 return collective_manager->GetLocalGroupSize(world_group);
49 }
50 } // namespace
51
PinMemPool()52 PinMemPool::PinMemPool() {
53 const auto &offload_context = OffloadContext::GetInstance();
54 pinned_mem_ = offload_context->enable_pinned_mem();
55 }
56
AllocPinMem(size_t size)57 void *PinMemPool::AllocPinMem(size_t size) {
58 if (!inited_) {
59 Init();
60 }
61 return AllocTensorMem(size);
62 }
63
AllocDeviceMem(size_t alloc_size,DeviceMemPtr * addr)64 size_t PinMemPool::AllocDeviceMem(size_t alloc_size, DeviceMemPtr *addr) {
65 if (alloc_size == 0) {
66 MS_LOG(EXCEPTION) << "The memory alloc size is 0.";
67 }
68
69 #if defined(_WIN32) || defined(_WIN64)
70 *addr = malloc(alloc_size);
71 #else
72 if (!pinned_mem_) {
73 auto status = posix_memalign(addr, kMemPoolAlignSize, alloc_size);
74 if (status != 0) {
75 MS_LOG(ERROR) << "The PinMemPool posix_memalign failed, error code is " << status << ".";
76 }
77 } else {
78 PinnedMemAlloc(addr, alloc_size);
79 }
80 #endif
81 if (*addr == nullptr) {
82 MS_LOG(ERROR) << "Malloc memory failed.";
83 return 0;
84 }
85 total_used_memory_ += alloc_size;
86 MS_LOG(INFO) << "Current PinMemPool alloc size[" << alloc_size << "], total used size[" << total_used_memory_
87 << "], available host mem size [" << max_size_ - total_used_memory_ << "].";
88 return alloc_size;
89 }
90
Init()91 void PinMemPool::Init() {
92 std::lock_guard<std::mutex> lock(mutex_);
93 if (inited_) {
94 return;
95 }
96 const auto &offload_context = OffloadContext::GetInstance();
97 auto cpu_mem_size = offload_context->offload_cpu_size();
98 if (!mindspore::IsStandAlone() && !offload_context->cpu_size_configured()) {
99 auto local_rank_size = GetLocalRankSize();
100 if (local_rank_size == 0) {
101 MS_LOG(ERROR) << "Local rank size can not be 0, reset to 1.";
102 local_rank_size = 1;
103 }
104 cpu_mem_size = cpu_mem_size / local_rank_size;
105 }
106 max_size_ = cpu_mem_size;
107 SetMemPoolBlockSize(max_size_);
108 inited_ = true;
109 MS_LOG(INFO) << "PinMemPool init success.";
110 }
111
SetMemPoolBlockSize(size_t available_pin_mem_size)112 void PinMemPool::SetMemPoolBlockSize(size_t available_pin_mem_size) {
113 const auto &offload_context = OffloadContext::GetInstance();
114 auto real_block_size = std::min(available_pin_mem_size, offload_context->host_mem_block_size());
115 SetMemAllocUintSize(real_block_size);
116 }
117
free_mem_size()118 size_t PinMemPool::free_mem_size() { return max_size_ - total_used_memory_; }
119 } // namespace device
120 } // namespace mindspore
121