• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright 2020 Huawei Technologies Co., Ltd
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "backend/kernel_compiler/aicpu/aicpu_kernel_mod.h"
18 
19 #include <memory>
20 #include <vector>
21 #include <string>
22 #include <algorithm>
23 
24 #include "runtime/mem.h"
25 #include "runtime/rt.h"
26 #include "utils/convert_utils.h"
27 #include "backend/kernel_compiler/aicpu/aicpu_util.h"
28 #include "utils/ms_context.h"
29 #include "runtime/device/ascend/executor/ai_cpu_dynamic_kernel.h"
30 #include "runtime/device/kernel_runtime.h"
31 #include "runtime/device/ascend/executor/host_dynamic_kernel.h"
32 
33 using AicpuTaskInfoPtr = std::shared_ptr<mindspore::ge::model_runner::AicpuTaskInfo>;
34 using AicpuDynamicKernel = mindspore::device::ascend::AiCpuDynamicKernel;
35 using HostDynamicKernel = mindspore::device::ascend::HostDynamicKernel;
36 
37 namespace mindspore {
38 namespace kernel {
39 constexpr auto AICPU_OPS_SO_NAME = "libaicpu_kernels.so";
40 constexpr auto CUST_AICPU_OPS_SO_NAME = "libcpu_kernels.so";
41 
AicpuOpKernelMod()42 AicpuOpKernelMod::AicpuOpKernelMod() : anf_node_(nullptr) {}
43 
~AicpuOpKernelMod()44 AicpuOpKernelMod::~AicpuOpKernelMod() {
45   args_.clear();
46   inputList_.clear();
47   outputList_.clear();
48   anf_node_ = nullptr;
49   input_size_list_.clear();
50   output_size_list_.clear();
51   workspace_size_list_.clear();
52   ext_info_.clear();
53 }
54 
SetInputSizeList(const std::vector<size_t> & size_list)55 void AicpuOpKernelMod::SetInputSizeList(const std::vector<size_t> &size_list) { input_size_list_ = size_list; }
GetInputSizeList() const56 const std::vector<size_t> &AicpuOpKernelMod::GetInputSizeList() const { return input_size_list_; }
SetOutputSizeList(const std::vector<size_t> & size_list)57 void AicpuOpKernelMod::SetOutputSizeList(const std::vector<size_t> &size_list) { output_size_list_ = size_list; }
GetOutputSizeList() const58 const std::vector<size_t> &AicpuOpKernelMod::GetOutputSizeList() const { return output_size_list_; }
SetWorkspaceSizeList(const std::vector<size_t> & size_list)59 void AicpuOpKernelMod::SetWorkspaceSizeList(const std::vector<size_t> &size_list) { workspace_size_list_ = size_list; }
GetWorkspaceSizeList() const60 const std::vector<size_t> &AicpuOpKernelMod::GetWorkspaceSizeList() const { return workspace_size_list_; }
SetInputList(const std::vector<int64_t> & inputList)61 void AicpuOpKernelMod::SetInputList(const std::vector<int64_t> &inputList) { inputList_ = inputList; }
SetOutputList(const std::vector<int64_t> & outputList)62 void AicpuOpKernelMod::SetOutputList(const std::vector<int64_t> &outputList) { outputList_ = outputList; }
SetNodeDef(const std::string & nodeDef)63 void AicpuOpKernelMod::SetNodeDef(const std::string &nodeDef) { (void)node_def_str_.assign(nodeDef); }
SetExtInfo(const std::string & ext_info)64 void AicpuOpKernelMod::SetExtInfo(const std::string &ext_info) { ext_info_ = ext_info; }
SetNodeName(const std::string & node_name)65 void AicpuOpKernelMod::SetNodeName(const std::string &node_name) { node_name_ = node_name; }
SetAnfNode(const mindspore::AnfNodePtr & anf_node)66 void AicpuOpKernelMod::SetAnfNode(const mindspore::AnfNodePtr &anf_node) {
67   MS_EXCEPTION_IF_NULL(anf_node);
68   anf_node_ = anf_node;
69 }
70 
CreateCpuKernelInfo(const std::vector<AddressPtr> & inputs,const std::vector<AddressPtr> & outputs)71 void AicpuOpKernelMod::CreateCpuKernelInfo(const std::vector<AddressPtr> &inputs,
72                                            const std::vector<AddressPtr> &outputs) {
73   MS_LOG(INFO) << "CreateCpuKernelInfoOffline start";
74 
75   if (kCustAiCpuKernelOps.find(node_name_) != kCustAiCpuKernelOps.end()) {
76     node_so_ = CUST_AICPU_OPS_SO_NAME;
77     node_name_ = kCustRunApi;
78   } else if (kCacheKernelOps.find(node_name_) != kCacheKernelOps.end()) {
79     node_so_ = AICPU_OPS_SO_NAME;
80     node_name_ = kCustRunApi;
81   } else {
82     if (node_so_ != CUST_AICPU_OPS_SO_NAME) {
83       node_so_ = AICPU_OPS_SO_NAME;
84     }
85   }
86   // InputOutputAddr
87   vector<void *> io_addrs;
88   (void)std::transform(std::begin(inputs), std::end(inputs), std::back_inserter(io_addrs),
89                        [](const AddressPtr &input) -> void * { return input->addr; });
90   (void)std::transform(std::begin(outputs), std::end(outputs), std::back_inserter(io_addrs),
91                        [](const AddressPtr &output) -> void * { return output->addr; });
92 
93   auto io_addrs_num = io_addrs.size();
94   // calculate paramLen: AicpuParamHead.len + ioAddrsSize + notifyId.len + customizedAttr.len
95   auto param_len = sizeof(AicpuParamHead);
96 
97   // get input and output addrs size, no need to check overflow
98   auto io_addrs_size = io_addrs_num * sizeof(uint64_t);
99   // refresh paramLen, no need to check overflow
100   param_len += io_addrs_size;
101 
102   auto node_def_len = node_def_str_.length();
103   param_len += node_def_len;
104   param_len += sizeof(uint32_t);
105 
106   AicpuParamHead aicpu_param_head{};
107   aicpu_param_head.length = param_len;
108   aicpu_param_head.ioAddrNum = io_addrs_num;
109 
110   if (ext_info_.empty()) {
111     MS_LOG(INFO) << "Static Shape Kernel";
112     aicpu_param_head.extInfoLength = 0;
113     aicpu_param_head.extInfoAddr = 0;
114   } else {
115     MS_LOG(INFO) << "Dynamic Kernel Ext Info size:" << ext_info_.size();
116   }
117 
118   args_.clear();
119   (void)args_.append(reinterpret_cast<const char *>(&aicpu_param_head), sizeof(AicpuParamHead));
120   // TaskArgs append ioAddrs
121   if (io_addrs_size != 0) {
122     (void)args_.append(reinterpret_cast<const char *>(io_addrs.data()), io_addrs_size);
123   }
124 
125   // size for node_def
126   args_.append(reinterpret_cast<const char *>(&node_def_len), sizeof(uint32_t));
127 
128   // When it's aicpu customized ops, taskArgs should append customized attr
129   if (node_def_len != 0) {
130     (void)args_.append(reinterpret_cast<const char *>(node_def_str_.data()), node_def_len);
131   }
132 
133   MS_LOG(INFO) << "CreateCpuKernelInfoOffline end";
134 }
135 
Launch(const std::vector<AddressPtr> & inputs,const std::vector<AddressPtr> &,const std::vector<AddressPtr> & outputs,void * stream_ptr)136 bool AicpuOpKernelMod::Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &,
137                               const std::vector<AddressPtr> &outputs, void *stream_ptr) {
138   if (stream_ptr == nullptr) {
139     MS_LOG(ERROR) << "stream_ptr should not be nullptr.";
140     return false;
141   }
142   if (stream_ == nullptr) {
143     stream_ = stream_ptr;
144   }
145   CreateCpuKernelInfo(inputs, outputs);
146   if (node_name_ == kTopK) {
147     node_name_ = kTopKV2;
148   }
149   if (node_name_ == kStack) {
150     node_name_ = kPack;
151   }
152   MS_LOG(INFO) << "Aicpu launch, node_so_:" << node_so_ << ", node name:" << node_name_
153                << ", args_size:" << args_.length();
154   if (rtCpuKernelLaunch(reinterpret_cast<const void *>(node_so_.c_str()),
155                         reinterpret_cast<const void *>(node_name_.c_str()), 1,
156                         reinterpret_cast<const void *>(args_.data()), static_cast<uint32_t>(args_.length()), nullptr,
157                         stream_) != RT_ERROR_NONE) {
158     MS_LOG(ERROR) << "Aicpu op launch failed!";
159 
160     return false;
161   }
162   return true;
163 }
164 
GenTask(const std::vector<AddressPtr> & inputs,const std::vector<AddressPtr> &,const std::vector<AddressPtr> & outputs,uint32_t stream_id)165 std::vector<TaskInfoPtr> AicpuOpKernelMod::GenTask(const std::vector<AddressPtr> &inputs,
166                                                    const std::vector<AddressPtr> &,
167                                                    const std::vector<AddressPtr> &outputs, uint32_t stream_id) {
168   MS_LOG(INFO) << "AicpuOpKernelMod GenTask start";
169 
170   stream_id_ = stream_id;
171   if (kCustAiCpuKernelOps.find(node_name_) != kCustAiCpuKernelOps.end()) {
172     node_so_ = CUST_AICPU_OPS_SO_NAME;
173     node_name_ = kCustRunApi;
174   } else if (kCacheKernelOps.find(node_name_) != kCacheKernelOps.end()) {
175     node_so_ = AICPU_OPS_SO_NAME;
176     node_name_ = kCustRunApi;
177   } else {
178     if (node_so_ != CUST_AICPU_OPS_SO_NAME) {
179       node_so_ = AICPU_OPS_SO_NAME;
180     }
181   }
182   std::vector<void *> input_data_addrs;
183   (void)std::transform(std::begin(inputs), std::end(inputs), std::back_inserter(input_data_addrs),
184                        [](const AddressPtr &input) -> void * { return input->addr; });
185 
186   std::vector<void *> output_data_addrs;
187   (void)std::transform(std::begin(outputs), std::end(outputs), std::back_inserter(output_data_addrs),
188                        [](const AddressPtr &output) -> void * { return output->addr; });
189 
190   if (node_name_ == kTopK) {
191     node_name_ = kTopKV2;
192   }
193 
194   if (node_name_ == kStack) {
195     node_name_ = kPack;
196   }
197 
198   AicpuTaskInfoPtr task_info_ptr = std::make_shared<mindspore::ge::model_runner::AicpuTaskInfo>(
199     unique_name_, stream_id, node_so_, node_name_, node_def_str_, ext_info_, input_data_addrs, output_data_addrs,
200     NeedDump());
201 
202   MS_LOG(INFO) << "AicpuOpKernelMod GenTask end";
203   return {task_info_ptr};
204 }
205 
GenDynamicKernel(const CNodePtr & cnode_ptr,void * stream_ptr)206 device::DynamicKernelPtr AicpuOpKernelMod::GenDynamicKernel(const CNodePtr &cnode_ptr, void *stream_ptr) {
207   AddressPtrList kernel_inputs;
208   AddressPtrList kernel_workspaces;
209   AddressPtrList kernel_outputs;
210   device::KernelRuntime::GenLaunchArgs(*this, cnode_ptr, &kernel_inputs, &kernel_workspaces, &kernel_outputs);
211 
212   CreateCpuKernelInfo(kernel_inputs, kernel_outputs);
213   return std::make_shared<AicpuDynamicKernel>(stream_ptr, cnode_ptr, args_, ext_info_, node_so_, node_name_);
214 }
215 }  // namespace kernel
216 }  // namespace mindspore
217