1 /**
2 * Copyright 2020 Huawei Technologies Co., Ltd
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "backend/kernel_compiler/aicpu/aicpu_kernel_mod.h"
18
19 #include <memory>
20 #include <vector>
21 #include <string>
22 #include <algorithm>
23
24 #include "runtime/mem.h"
25 #include "runtime/rt.h"
26 #include "utils/convert_utils.h"
27 #include "backend/kernel_compiler/aicpu/aicpu_util.h"
28 #include "utils/ms_context.h"
29 #include "runtime/device/ascend/executor/ai_cpu_dynamic_kernel.h"
30 #include "runtime/device/kernel_runtime.h"
31 #include "runtime/device/ascend/executor/host_dynamic_kernel.h"
32
33 using AicpuTaskInfoPtr = std::shared_ptr<mindspore::ge::model_runner::AicpuTaskInfo>;
34 using AicpuDynamicKernel = mindspore::device::ascend::AiCpuDynamicKernel;
35 using HostDynamicKernel = mindspore::device::ascend::HostDynamicKernel;
36
37 namespace mindspore {
38 namespace kernel {
39 constexpr auto AICPU_OPS_SO_NAME = "libaicpu_kernels.so";
40 constexpr auto CUST_AICPU_OPS_SO_NAME = "libcpu_kernels.so";
41
AicpuOpKernelMod()42 AicpuOpKernelMod::AicpuOpKernelMod() : anf_node_(nullptr) {}
43
~AicpuOpKernelMod()44 AicpuOpKernelMod::~AicpuOpKernelMod() {
45 args_.clear();
46 inputList_.clear();
47 outputList_.clear();
48 anf_node_ = nullptr;
49 input_size_list_.clear();
50 output_size_list_.clear();
51 workspace_size_list_.clear();
52 ext_info_.clear();
53 }
54
SetInputSizeList(const std::vector<size_t> & size_list)55 void AicpuOpKernelMod::SetInputSizeList(const std::vector<size_t> &size_list) { input_size_list_ = size_list; }
GetInputSizeList() const56 const std::vector<size_t> &AicpuOpKernelMod::GetInputSizeList() const { return input_size_list_; }
SetOutputSizeList(const std::vector<size_t> & size_list)57 void AicpuOpKernelMod::SetOutputSizeList(const std::vector<size_t> &size_list) { output_size_list_ = size_list; }
GetOutputSizeList() const58 const std::vector<size_t> &AicpuOpKernelMod::GetOutputSizeList() const { return output_size_list_; }
SetWorkspaceSizeList(const std::vector<size_t> & size_list)59 void AicpuOpKernelMod::SetWorkspaceSizeList(const std::vector<size_t> &size_list) { workspace_size_list_ = size_list; }
GetWorkspaceSizeList() const60 const std::vector<size_t> &AicpuOpKernelMod::GetWorkspaceSizeList() const { return workspace_size_list_; }
SetInputList(const std::vector<int64_t> & inputList)61 void AicpuOpKernelMod::SetInputList(const std::vector<int64_t> &inputList) { inputList_ = inputList; }
SetOutputList(const std::vector<int64_t> & outputList)62 void AicpuOpKernelMod::SetOutputList(const std::vector<int64_t> &outputList) { outputList_ = outputList; }
SetNodeDef(const std::string & nodeDef)63 void AicpuOpKernelMod::SetNodeDef(const std::string &nodeDef) { (void)node_def_str_.assign(nodeDef); }
SetExtInfo(const std::string & ext_info)64 void AicpuOpKernelMod::SetExtInfo(const std::string &ext_info) { ext_info_ = ext_info; }
SetNodeName(const std::string & node_name)65 void AicpuOpKernelMod::SetNodeName(const std::string &node_name) { node_name_ = node_name; }
SetAnfNode(const mindspore::AnfNodePtr & anf_node)66 void AicpuOpKernelMod::SetAnfNode(const mindspore::AnfNodePtr &anf_node) {
67 MS_EXCEPTION_IF_NULL(anf_node);
68 anf_node_ = anf_node;
69 }
70
CreateCpuKernelInfo(const std::vector<AddressPtr> & inputs,const std::vector<AddressPtr> & outputs)71 void AicpuOpKernelMod::CreateCpuKernelInfo(const std::vector<AddressPtr> &inputs,
72 const std::vector<AddressPtr> &outputs) {
73 MS_LOG(INFO) << "CreateCpuKernelInfoOffline start";
74
75 if (kCustAiCpuKernelOps.find(node_name_) != kCustAiCpuKernelOps.end()) {
76 node_so_ = CUST_AICPU_OPS_SO_NAME;
77 node_name_ = kCustRunApi;
78 } else if (kCacheKernelOps.find(node_name_) != kCacheKernelOps.end()) {
79 node_so_ = AICPU_OPS_SO_NAME;
80 node_name_ = kCustRunApi;
81 } else {
82 if (node_so_ != CUST_AICPU_OPS_SO_NAME) {
83 node_so_ = AICPU_OPS_SO_NAME;
84 }
85 }
86 // InputOutputAddr
87 vector<void *> io_addrs;
88 (void)std::transform(std::begin(inputs), std::end(inputs), std::back_inserter(io_addrs),
89 [](const AddressPtr &input) -> void * { return input->addr; });
90 (void)std::transform(std::begin(outputs), std::end(outputs), std::back_inserter(io_addrs),
91 [](const AddressPtr &output) -> void * { return output->addr; });
92
93 auto io_addrs_num = io_addrs.size();
94 // calculate paramLen: AicpuParamHead.len + ioAddrsSize + notifyId.len + customizedAttr.len
95 auto param_len = sizeof(AicpuParamHead);
96
97 // get input and output addrs size, no need to check overflow
98 auto io_addrs_size = io_addrs_num * sizeof(uint64_t);
99 // refresh paramLen, no need to check overflow
100 param_len += io_addrs_size;
101
102 auto node_def_len = node_def_str_.length();
103 param_len += node_def_len;
104 param_len += sizeof(uint32_t);
105
106 AicpuParamHead aicpu_param_head{};
107 aicpu_param_head.length = param_len;
108 aicpu_param_head.ioAddrNum = io_addrs_num;
109
110 if (ext_info_.empty()) {
111 MS_LOG(INFO) << "Static Shape Kernel";
112 aicpu_param_head.extInfoLength = 0;
113 aicpu_param_head.extInfoAddr = 0;
114 } else {
115 MS_LOG(INFO) << "Dynamic Kernel Ext Info size:" << ext_info_.size();
116 }
117
118 args_.clear();
119 (void)args_.append(reinterpret_cast<const char *>(&aicpu_param_head), sizeof(AicpuParamHead));
120 // TaskArgs append ioAddrs
121 if (io_addrs_size != 0) {
122 (void)args_.append(reinterpret_cast<const char *>(io_addrs.data()), io_addrs_size);
123 }
124
125 // size for node_def
126 args_.append(reinterpret_cast<const char *>(&node_def_len), sizeof(uint32_t));
127
128 // When it's aicpu customized ops, taskArgs should append customized attr
129 if (node_def_len != 0) {
130 (void)args_.append(reinterpret_cast<const char *>(node_def_str_.data()), node_def_len);
131 }
132
133 MS_LOG(INFO) << "CreateCpuKernelInfoOffline end";
134 }
135
Launch(const std::vector<AddressPtr> & inputs,const std::vector<AddressPtr> &,const std::vector<AddressPtr> & outputs,void * stream_ptr)136 bool AicpuOpKernelMod::Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &,
137 const std::vector<AddressPtr> &outputs, void *stream_ptr) {
138 if (stream_ptr == nullptr) {
139 MS_LOG(ERROR) << "stream_ptr should not be nullptr.";
140 return false;
141 }
142 if (stream_ == nullptr) {
143 stream_ = stream_ptr;
144 }
145 CreateCpuKernelInfo(inputs, outputs);
146 if (node_name_ == kTopK) {
147 node_name_ = kTopKV2;
148 }
149 if (node_name_ == kStack) {
150 node_name_ = kPack;
151 }
152 MS_LOG(INFO) << "Aicpu launch, node_so_:" << node_so_ << ", node name:" << node_name_
153 << ", args_size:" << args_.length();
154 if (rtCpuKernelLaunch(reinterpret_cast<const void *>(node_so_.c_str()),
155 reinterpret_cast<const void *>(node_name_.c_str()), 1,
156 reinterpret_cast<const void *>(args_.data()), static_cast<uint32_t>(args_.length()), nullptr,
157 stream_) != RT_ERROR_NONE) {
158 MS_LOG(ERROR) << "Aicpu op launch failed!";
159
160 return false;
161 }
162 return true;
163 }
164
GenTask(const std::vector<AddressPtr> & inputs,const std::vector<AddressPtr> &,const std::vector<AddressPtr> & outputs,uint32_t stream_id)165 std::vector<TaskInfoPtr> AicpuOpKernelMod::GenTask(const std::vector<AddressPtr> &inputs,
166 const std::vector<AddressPtr> &,
167 const std::vector<AddressPtr> &outputs, uint32_t stream_id) {
168 MS_LOG(INFO) << "AicpuOpKernelMod GenTask start";
169
170 stream_id_ = stream_id;
171 if (kCustAiCpuKernelOps.find(node_name_) != kCustAiCpuKernelOps.end()) {
172 node_so_ = CUST_AICPU_OPS_SO_NAME;
173 node_name_ = kCustRunApi;
174 } else if (kCacheKernelOps.find(node_name_) != kCacheKernelOps.end()) {
175 node_so_ = AICPU_OPS_SO_NAME;
176 node_name_ = kCustRunApi;
177 } else {
178 if (node_so_ != CUST_AICPU_OPS_SO_NAME) {
179 node_so_ = AICPU_OPS_SO_NAME;
180 }
181 }
182 std::vector<void *> input_data_addrs;
183 (void)std::transform(std::begin(inputs), std::end(inputs), std::back_inserter(input_data_addrs),
184 [](const AddressPtr &input) -> void * { return input->addr; });
185
186 std::vector<void *> output_data_addrs;
187 (void)std::transform(std::begin(outputs), std::end(outputs), std::back_inserter(output_data_addrs),
188 [](const AddressPtr &output) -> void * { return output->addr; });
189
190 if (node_name_ == kTopK) {
191 node_name_ = kTopKV2;
192 }
193
194 if (node_name_ == kStack) {
195 node_name_ = kPack;
196 }
197
198 AicpuTaskInfoPtr task_info_ptr = std::make_shared<mindspore::ge::model_runner::AicpuTaskInfo>(
199 unique_name_, stream_id, node_so_, node_name_, node_def_str_, ext_info_, input_data_addrs, output_data_addrs,
200 NeedDump());
201
202 MS_LOG(INFO) << "AicpuOpKernelMod GenTask end";
203 return {task_info_ptr};
204 }
205
GenDynamicKernel(const CNodePtr & cnode_ptr,void * stream_ptr)206 device::DynamicKernelPtr AicpuOpKernelMod::GenDynamicKernel(const CNodePtr &cnode_ptr, void *stream_ptr) {
207 AddressPtrList kernel_inputs;
208 AddressPtrList kernel_workspaces;
209 AddressPtrList kernel_outputs;
210 device::KernelRuntime::GenLaunchArgs(*this, cnode_ptr, &kernel_inputs, &kernel_workspaces, &kernel_outputs);
211
212 CreateCpuKernelInfo(kernel_inputs, kernel_outputs);
213 return std::make_shared<AicpuDynamicKernel>(stream_ptr, cnode_ptr, args_, ext_info_, node_so_, node_name_);
214 }
215 } // namespace kernel
216 } // namespace mindspore
217