• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright 2019-2020 Huawei Technologies Co., Ltd
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "runtime/device/ascend/ge_runtime/task/aicpu_task.h"
18 #include <vector>
19 #include "runtime/mem.h"
20 #include "runtime/kernel.h"
21 #include "runtime/device/ascend/ge_runtime/task/task_factory.h"
22 #include "aicpu/common/aicpu_task_struct.h"
23 #include "mindspore/core/utils/convert_utils_base.h"
24 
25 namespace mindspore::ge::model_runner {
AicpuTask(const ModelContext & model_context,const std::shared_ptr<AicpuTaskInfo> & task_info)26 AicpuTask::AicpuTask(const ModelContext &model_context, const std::shared_ptr<AicpuTaskInfo> &task_info)
27     : TaskRepeater<AicpuTaskInfo>(model_context, task_info),
28       task_info_(task_info),
29       stream_(nullptr),
30       args_(nullptr),
31       ext_info_(nullptr),
32       input_output_addr_(nullptr) {
33   MS_EXCEPTION_IF_NULL(task_info_);
34 
35   auto stream_list = model_context.stream_list();
36   if (stream_list.size() == 1) {
37     stream_ = stream_list[0];
38   } else if (stream_list.size() > task_info_->stream_id()) {
39     stream_ = stream_list[task_info_->stream_id()];
40   } else {
41     MS_LOG(EXCEPTION) << "Index: " << task_info_->stream_id() << " >= stream_list.size(): " << stream_list.size();
42   }
43 }
44 
~AicpuTask()45 AicpuTask::~AicpuTask() {
46   ReleaseRtMem(&args_);
47   ReleaseRtMem(&ext_info_);
48 }
49 
Distribute()50 void AicpuTask::Distribute() {
51   MS_LOG(INFO) << "InitAicpuTask start.";
52   std::vector<void *> io_addrs;
53   io_addrs.insert(io_addrs.end(), task_info_->input_data_addrs().begin(), task_info_->input_data_addrs().end());
54   io_addrs.insert(io_addrs.end(), task_info_->output_data_addrs().begin(), task_info_->output_data_addrs().end());
55   auto io_addrs_num = static_cast<uint32_t>(io_addrs.size());
56   auto io_addrs_size = static_cast<uint32_t>(io_addrs_num * sizeof(void *));
57   constexpr uint32_t io_addr_offset = sizeof(aicpu::AicpuParamHead);
58   uint32_t node_def_len_offset = io_addr_offset + io_addrs_size;
59   uint32_t node_def_addr_offset = node_def_len_offset + sizeof(uint32_t);
60   uint32_t args_size = sizeof(aicpu::AicpuParamHead) + io_addrs_size +
61                        static_cast<uint32_t>(task_info_->node_def().size()) + sizeof(uint32_t);
62 
63   // Malloc device memory for args
64   rtError_t rt_ret = rtMalloc(&args_, args_size, RT_MEMORY_HBM);
65   if (rt_ret != RT_ERROR_NONE) {
66     MS_LOG(EXCEPTION) << "Call rt api rtMalloc failed, ret: " << rt_ret;
67   }
68 
69   SetAicpuParamHead(args_size, io_addrs_num);
70   SetInputOutputAddrs(io_addrs, io_addr_offset);
71   SetNodeDef(node_def_len_offset, node_def_addr_offset);
72 
73   // for data dump
74   input_output_addr_ = reinterpret_cast<void *>(reinterpret_cast<uint8_t *>(args_) + io_addr_offset);
75   auto dump_flag = task_info_->dump_flag() ? RT_KERNEL_DUMPFLAG : RT_KERNEL_DEFAULT;
76 
77   MS_LOG(INFO) << "Distribute AicpuTask start, args_size = " << args_size << ", io_addrs_num =" << io_addrs_num
78                << ", so_name = " << task_info_->so_name() << ", kernel_name = " << task_info_->kernel_name()
79                << ", dump_flag = " << dump_flag;
80   rt_ret = rtCpuKernelLaunchWithFlag(reinterpret_cast<const void *>(task_info_->so_name().data()),
81                                      reinterpret_cast<const void *>(task_info_->kernel_name().data()), 1, args_,
82                                      args_size, nullptr, stream_, dump_flag);
83   if (rt_ret != RT_ERROR_NONE) {
84     MS_LOG(EXCEPTION) << "Call rt api rtCpuKernelLaunchWithFlag failed, ret: " << rt_ret;
85   }
86 
87   MS_LOG(INFO) << "Distribute AicpuTask end.";
88 }
89 
ReleaseRtMem(void ** ptr)90 void AicpuTask::ReleaseRtMem(void **ptr) noexcept {
91   if (ptr == nullptr || *ptr == nullptr) {
92     return;
93   }
94 
95   rtError_t rt_ret = rtFree(*ptr);
96   if (rt_ret != RT_ERROR_NONE) {
97     return;
98   }
99   *ptr = nullptr;
100 }
101 
SetAicpuParamHead(uint32_t args_size,uint32_t io_addrs_num)102 void AicpuTask::SetAicpuParamHead(uint32_t args_size, uint32_t io_addrs_num) {
103   aicpu::AicpuParamHead aicpu_param_head;
104   aicpu_param_head.length = args_size;
105   aicpu_param_head.ioAddrNum = io_addrs_num;
106 
107   const auto &ext_info = task_info_->ext_info();
108   uint32_t ext_size = SizeToUint(ext_info.size());
109   if (ext_info.empty()) {
110     aicpu_param_head.extInfoLength = 0;
111     aicpu_param_head.extInfoAddr = 0;
112   } else {
113     rtError_t flag = rtMalloc(&ext_info_, ext_size, RT_MEMORY_HBM);
114     if (flag != RT_ERROR_NONE) {
115       MS_LOG(EXCEPTION) << "Call rt api rtMalloc failed, ret: " << flag;
116     }
117 
118     flag = rtMemcpy(ext_info_, ext_size, const_cast<void *>(reinterpret_cast<const void *>(ext_info.data())), ext_size,
119                     RT_MEMCPY_HOST_TO_DEVICE);
120     if (flag != RT_ERROR_NONE) {
121       MS_LOG(EXCEPTION) << "Call rt api rtMemcpy failed, ret: " << flag;
122     }
123 
124     MS_LOG(INFO) << "ext info size: " << ext_size;
125     aicpu_param_head.extInfoLength = ext_size;
126     aicpu_param_head.extInfoAddr = reinterpret_cast<uintptr_t>(ext_info_);
127   }
128 
129   // Memcpy AicpuParamHead
130   auto rt_ret = rtMemcpy(args_, sizeof(aicpu::AicpuParamHead), reinterpret_cast<void *>(&aicpu_param_head),
131                          sizeof(aicpu::AicpuParamHead), RT_MEMCPY_HOST_TO_DEVICE);
132   if (rt_ret != RT_ERROR_NONE) {
133     MS_LOG(EXCEPTION) << "Call rt api rtMemcpy failed, ret: " << rt_ret;
134   }
135 }
136 
SetInputOutputAddrs(const std::vector<void * > & io_addrs,uint32_t io_addr_offset)137 void AicpuTask::SetInputOutputAddrs(const std::vector<void *> &io_addrs, uint32_t io_addr_offset) {
138   // Memcpy io addrs
139   if (!io_addrs.empty()) {
140     auto rt_ret = rtMemcpy(reinterpret_cast<void *>(reinterpret_cast<uint8_t *>(args_) + io_addr_offset),
141                            static_cast<uint32_t>(io_addrs.size()) * sizeof(void *), io_addrs.data(),
142                            static_cast<uint32_t>(io_addrs.size()) * sizeof(void *), RT_MEMCPY_HOST_TO_DEVICE);
143     if (rt_ret != RT_ERROR_NONE) {
144       MS_LOG(EXCEPTION) << "Call rt api rtMemcpy failed, ret: " << rt_ret;
145     }
146   }
147 }
148 
SetNodeDef(uint32_t node_def_len_offset,uint32_t node_def_addr_offset)149 void AicpuTask::SetNodeDef(uint32_t node_def_len_offset, uint32_t node_def_addr_offset) {
150   // Memcpy node def
151   auto size = task_info_->node_def().size();
152   auto rt_ret =
153     rtMemcpy(reinterpret_cast<void *>(reinterpret_cast<uint8_t *>(args_) + node_def_len_offset), sizeof(uint32_t),
154              reinterpret_cast<const void *>(&size), sizeof(uint32_t), RT_MEMCPY_HOST_TO_DEVICE);
155   if (rt_ret != RT_ERROR_NONE) {
156     MS_LOG(EXCEPTION) << "Call rt api rtMemcpy failed, ret: " << rt_ret;
157   }
158 
159   // Memcpy node def
160   rt_ret = rtMemcpy(reinterpret_cast<void *>(reinterpret_cast<uint8_t *>(args_) + node_def_addr_offset),
161                     task_info_->node_def().size(), reinterpret_cast<const void *>(task_info_->node_def().data()),
162                     task_info_->node_def().size(), RT_MEMCPY_HOST_TO_DEVICE);
163   if (rt_ret != RT_ERROR_NONE) {
164     MS_LOG(EXCEPTION) << "Call rt api rtMemcpy failed, ret: " << rt_ret;
165   }
166 }
167 
168 REGISTER_TASK(TaskInfoType::AICPU, AicpuTask, AicpuTaskInfo);
169 }  // namespace mindspore::ge::model_runner
170