1 /**
2 * Copyright 2019-2020 Huawei Technologies Co., Ltd
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "runtime/device/ascend/ge_runtime/task/aicpu_task.h"
18 #include <vector>
19 #include "runtime/mem.h"
20 #include "runtime/kernel.h"
21 #include "runtime/device/ascend/ge_runtime/task/task_factory.h"
22 #include "aicpu/common/aicpu_task_struct.h"
23 #include "mindspore/core/utils/convert_utils_base.h"
24
25 namespace mindspore::ge::model_runner {
AicpuTask(const ModelContext & model_context,const std::shared_ptr<AicpuTaskInfo> & task_info)26 AicpuTask::AicpuTask(const ModelContext &model_context, const std::shared_ptr<AicpuTaskInfo> &task_info)
27 : TaskRepeater<AicpuTaskInfo>(model_context, task_info),
28 task_info_(task_info),
29 stream_(nullptr),
30 args_(nullptr),
31 ext_info_(nullptr),
32 input_output_addr_(nullptr) {
33 MS_EXCEPTION_IF_NULL(task_info_);
34
35 auto stream_list = model_context.stream_list();
36 if (stream_list.size() == 1) {
37 stream_ = stream_list[0];
38 } else if (stream_list.size() > task_info_->stream_id()) {
39 stream_ = stream_list[task_info_->stream_id()];
40 } else {
41 MS_LOG(EXCEPTION) << "Index: " << task_info_->stream_id() << " >= stream_list.size(): " << stream_list.size();
42 }
43 }
44
~AicpuTask()45 AicpuTask::~AicpuTask() {
46 ReleaseRtMem(&args_);
47 ReleaseRtMem(&ext_info_);
48 }
49
Distribute()50 void AicpuTask::Distribute() {
51 MS_LOG(INFO) << "InitAicpuTask start.";
52 std::vector<void *> io_addrs;
53 io_addrs.insert(io_addrs.end(), task_info_->input_data_addrs().begin(), task_info_->input_data_addrs().end());
54 io_addrs.insert(io_addrs.end(), task_info_->output_data_addrs().begin(), task_info_->output_data_addrs().end());
55 auto io_addrs_num = static_cast<uint32_t>(io_addrs.size());
56 auto io_addrs_size = static_cast<uint32_t>(io_addrs_num * sizeof(void *));
57 constexpr uint32_t io_addr_offset = sizeof(aicpu::AicpuParamHead);
58 uint32_t node_def_len_offset = io_addr_offset + io_addrs_size;
59 uint32_t node_def_addr_offset = node_def_len_offset + sizeof(uint32_t);
60 uint32_t args_size = sizeof(aicpu::AicpuParamHead) + io_addrs_size +
61 static_cast<uint32_t>(task_info_->node_def().size()) + sizeof(uint32_t);
62
63 // Malloc device memory for args
64 rtError_t rt_ret = rtMalloc(&args_, args_size, RT_MEMORY_HBM);
65 if (rt_ret != RT_ERROR_NONE) {
66 MS_LOG(EXCEPTION) << "Call rt api rtMalloc failed, ret: " << rt_ret;
67 }
68
69 SetAicpuParamHead(args_size, io_addrs_num);
70 SetInputOutputAddrs(io_addrs, io_addr_offset);
71 SetNodeDef(node_def_len_offset, node_def_addr_offset);
72
73 // for data dump
74 input_output_addr_ = reinterpret_cast<void *>(reinterpret_cast<uint8_t *>(args_) + io_addr_offset);
75 auto dump_flag = task_info_->dump_flag() ? RT_KERNEL_DUMPFLAG : RT_KERNEL_DEFAULT;
76
77 MS_LOG(INFO) << "Distribute AicpuTask start, args_size = " << args_size << ", io_addrs_num =" << io_addrs_num
78 << ", so_name = " << task_info_->so_name() << ", kernel_name = " << task_info_->kernel_name()
79 << ", dump_flag = " << dump_flag;
80 rt_ret = rtCpuKernelLaunchWithFlag(reinterpret_cast<const void *>(task_info_->so_name().data()),
81 reinterpret_cast<const void *>(task_info_->kernel_name().data()), 1, args_,
82 args_size, nullptr, stream_, dump_flag);
83 if (rt_ret != RT_ERROR_NONE) {
84 MS_LOG(EXCEPTION) << "Call rt api rtCpuKernelLaunchWithFlag failed, ret: " << rt_ret;
85 }
86
87 MS_LOG(INFO) << "Distribute AicpuTask end.";
88 }
89
ReleaseRtMem(void ** ptr)90 void AicpuTask::ReleaseRtMem(void **ptr) noexcept {
91 if (ptr == nullptr || *ptr == nullptr) {
92 return;
93 }
94
95 rtError_t rt_ret = rtFree(*ptr);
96 if (rt_ret != RT_ERROR_NONE) {
97 return;
98 }
99 *ptr = nullptr;
100 }
101
SetAicpuParamHead(uint32_t args_size,uint32_t io_addrs_num)102 void AicpuTask::SetAicpuParamHead(uint32_t args_size, uint32_t io_addrs_num) {
103 aicpu::AicpuParamHead aicpu_param_head;
104 aicpu_param_head.length = args_size;
105 aicpu_param_head.ioAddrNum = io_addrs_num;
106
107 const auto &ext_info = task_info_->ext_info();
108 uint32_t ext_size = SizeToUint(ext_info.size());
109 if (ext_info.empty()) {
110 aicpu_param_head.extInfoLength = 0;
111 aicpu_param_head.extInfoAddr = 0;
112 } else {
113 rtError_t flag = rtMalloc(&ext_info_, ext_size, RT_MEMORY_HBM);
114 if (flag != RT_ERROR_NONE) {
115 MS_LOG(EXCEPTION) << "Call rt api rtMalloc failed, ret: " << flag;
116 }
117
118 flag = rtMemcpy(ext_info_, ext_size, const_cast<void *>(reinterpret_cast<const void *>(ext_info.data())), ext_size,
119 RT_MEMCPY_HOST_TO_DEVICE);
120 if (flag != RT_ERROR_NONE) {
121 MS_LOG(EXCEPTION) << "Call rt api rtMemcpy failed, ret: " << flag;
122 }
123
124 MS_LOG(INFO) << "ext info size: " << ext_size;
125 aicpu_param_head.extInfoLength = ext_size;
126 aicpu_param_head.extInfoAddr = reinterpret_cast<uintptr_t>(ext_info_);
127 }
128
129 // Memcpy AicpuParamHead
130 auto rt_ret = rtMemcpy(args_, sizeof(aicpu::AicpuParamHead), reinterpret_cast<void *>(&aicpu_param_head),
131 sizeof(aicpu::AicpuParamHead), RT_MEMCPY_HOST_TO_DEVICE);
132 if (rt_ret != RT_ERROR_NONE) {
133 MS_LOG(EXCEPTION) << "Call rt api rtMemcpy failed, ret: " << rt_ret;
134 }
135 }
136
SetInputOutputAddrs(const std::vector<void * > & io_addrs,uint32_t io_addr_offset)137 void AicpuTask::SetInputOutputAddrs(const std::vector<void *> &io_addrs, uint32_t io_addr_offset) {
138 // Memcpy io addrs
139 if (!io_addrs.empty()) {
140 auto rt_ret = rtMemcpy(reinterpret_cast<void *>(reinterpret_cast<uint8_t *>(args_) + io_addr_offset),
141 static_cast<uint32_t>(io_addrs.size()) * sizeof(void *), io_addrs.data(),
142 static_cast<uint32_t>(io_addrs.size()) * sizeof(void *), RT_MEMCPY_HOST_TO_DEVICE);
143 if (rt_ret != RT_ERROR_NONE) {
144 MS_LOG(EXCEPTION) << "Call rt api rtMemcpy failed, ret: " << rt_ret;
145 }
146 }
147 }
148
SetNodeDef(uint32_t node_def_len_offset,uint32_t node_def_addr_offset)149 void AicpuTask::SetNodeDef(uint32_t node_def_len_offset, uint32_t node_def_addr_offset) {
150 // Memcpy node def
151 auto size = task_info_->node_def().size();
152 auto rt_ret =
153 rtMemcpy(reinterpret_cast<void *>(reinterpret_cast<uint8_t *>(args_) + node_def_len_offset), sizeof(uint32_t),
154 reinterpret_cast<const void *>(&size), sizeof(uint32_t), RT_MEMCPY_HOST_TO_DEVICE);
155 if (rt_ret != RT_ERROR_NONE) {
156 MS_LOG(EXCEPTION) << "Call rt api rtMemcpy failed, ret: " << rt_ret;
157 }
158
159 // Memcpy node def
160 rt_ret = rtMemcpy(reinterpret_cast<void *>(reinterpret_cast<uint8_t *>(args_) + node_def_addr_offset),
161 task_info_->node_def().size(), reinterpret_cast<const void *>(task_info_->node_def().data()),
162 task_info_->node_def().size(), RT_MEMCPY_HOST_TO_DEVICE);
163 if (rt_ret != RT_ERROR_NONE) {
164 MS_LOG(EXCEPTION) << "Call rt api rtMemcpy failed, ret: " << rt_ret;
165 }
166 }
167
168 REGISTER_TASK(TaskInfoType::AICPU, AicpuTask, AicpuTaskInfo);
169 } // namespace mindspore::ge::model_runner
170