1 /**
2 * Copyright 2022 Huawei Technologies Co., Ltd
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "src/litert/delegate/nnapi/nnapi_delegate.h"
18 #include <queue>
19 #include <algorithm>
20 #include <string>
21 #include <vector>
22 #include <memory>
23 #include "include/errorcode.h"
24 #include "src/litert/delegate/delegate_utils.h"
25 #include "src/litert/delegate/nnapi/nnapi_utils.h"
26 #include "src/litert/delegate/nnapi/op/activation_nnapi.h"
27 #include "src/litert/delegate/nnapi/op/arithmetic_nnapi.h"
28 #include "src/litert/delegate/nnapi/op/cast_nnapi.h"
29 #include "src/litert/delegate/nnapi/op/concat_nnapi.h"
30 #include "src/litert/delegate/nnapi/op/conv_nnapi.h"
31 #include "src/litert/delegate/nnapi/op/conv_transpose_nnapi.h"
32 #include "src/litert/delegate/nnapi/op/full_connection_nnapi.h"
33 #include "src/litert/delegate/nnapi/op/gather_nnapi.h"
34 #include "src/litert/delegate/nnapi/op/instance_norm_nnapi.h"
35 #include "src/litert/delegate/nnapi/op/padding_nnapi.h"
36 #include "src/litert/delegate/nnapi/op/pooling_nnapi.h"
37 #include "src/litert/delegate/nnapi/op/reshape_nnapi.h"
38 #include "src/litert/delegate/nnapi/op/resize_nnapi.h"
39 #include "src/litert/delegate/nnapi/op/reduce_nnapi.h"
40 #include "src/litert/delegate/nnapi/op/scale_nnapi.h"
41 #include "src/litert/delegate/nnapi/op/split_nnapi.h"
42 #include "src/litert/delegate/nnapi/op/stack_nnapi.h"
43 #include "src/litert/delegate/nnapi/op/softmax_nnapi.h"
44 #include "src/litert/delegate/nnapi/op/strided_slice_nnapi.h"
45 #include "src/litert/delegate/nnapi/op/transpose_nnapi.h"
46 #include "src/litert/delegate/nnapi/op/topk_nnapi.h"
47 #include "nnacl/op_base.h"
48
49 namespace mindspore {
50 namespace lite {
GetSpecifiedDevices(const std::vector<std::string> & specified_devices,bool only_use_acc_device,bool disable_cpu_device,std::vector<ANeuralNetworksDevice * > * devices)51 void GetSpecifiedDevices(const std::vector<std::string> &specified_devices, bool only_use_acc_device,
52 bool disable_cpu_device, std::vector<ANeuralNetworksDevice *> *devices) {
53 MS_CHECK_TRUE_RET_VOID(devices != nullptr);
54 uint32_t device_count;
55 nnapi_->ANeuralNetworks_getDeviceCount(&device_count);
56 int32_t type;
57 const char *name;
58 ANeuralNetworksDevice *device;
59 for (auto idx = 0; idx < device_count; idx++) {
60 nnapi_->ANeuralNetworks_getDevice(idx, &device);
61 nnapi_->ANeuralNetworksDevice_getName(device, &name);
62 nnapi_->ANeuralNetworksDevice_getType(device, &type);
63 MS_LOG(DEBUG) << "Found available device: " << name << ", and the type is: " << type;
64 if (std::find(specified_devices.begin(), specified_devices.end(), name) != specified_devices.end()) {
65 devices->push_back(device);
66 continue;
67 }
68 if (specified_devices.empty() && only_use_acc_device && type == ANEURALNETWORKS_DEVICE_ACCELERATOR) {
69 devices->push_back(device);
70 continue;
71 }
72 if (specified_devices.empty() && disable_cpu_device && type != ANEURALNETWORKS_DEVICE_CPU) {
73 devices->push_back(device);
74 }
75 }
76 }
77
Init()78 Status NNAPIDelegate::Init() {
79 if (nnapi_ == nullptr || !nnapi_->nnapi_exists) {
80 MS_LOG(ERROR) << "NNAPI is not available.";
81 return mindspore::kLiteNullptr;
82 }
83 if (nnapi_->android_sdk_version >= ANEURALNETWORKS_FEATURE_LEVEL_3) {
84 GetSpecifiedDevices(specified_devices_, only_use_acc_device_, disable_cpu_device_, &devices_);
85 }
86 op_func_lists_ = {
87 {schema::PrimitiveType_Activation, GetNNAPIOp<NNAPIActivation>},
88 {schema::PrimitiveType_AddFusion, GetNNAPIOp<NNAPIArithmetic>},
89 {schema::PrimitiveType_SubFusion, GetNNAPIOp<NNAPIArithmetic>},
90 {schema::PrimitiveType_MulFusion, GetNNAPIOp<NNAPIArithmetic>},
91 {schema::PrimitiveType_DivFusion, GetNNAPIOp<NNAPIArithmetic>},
92 {schema::PrimitiveType_Cast, GetNNAPIOp<NNAPICast>},
93 {schema::PrimitiveType_Concat, GetNNAPIOp<NNAPIConcat>},
94 {schema::PrimitiveType_Conv2DFusion, GetNNAPIOp<NNAPIConv>},
95 {schema::PrimitiveType_Conv2dTransposeFusion, GetNNAPIOp<NNAPIConvTranspose>},
96 {schema::PrimitiveType_Equal, GetNNAPIOp<NNAPICommon>},
97 {schema::PrimitiveType_ExpFusion, GetNNAPIOp<NNAPICommon>},
98 {schema::PrimitiveType_Floor, GetNNAPIOp<NNAPICommon>},
99 {schema::PrimitiveType_FullConnection, GetNNAPIOp<NNAPIFullConnection>},
100 {schema::PrimitiveType_Gather, GetNNAPIOp<NNAPIGather>},
101 {schema::PrimitiveType_InstanceNorm, GetNNAPIOp<NNAPIInstanceNorm>},
102 {schema::PrimitiveType_AvgPoolFusion, GetNNAPIOp<NNAPIPooling>},
103 {schema::PrimitiveType_MaxPoolFusion, GetNNAPIOp<NNAPIPooling>},
104 {schema::PrimitiveType_PadFusion, GetNNAPIOp<NNAPIPadding>},
105 {schema::PrimitiveType_Reshape, GetNNAPIOp<NNAPIReshape>},
106 {schema::PrimitiveType_Resize, GetNNAPIOp<NNAPIResize>},
107 {schema::PrimitiveType_ReduceFusion, GetNNAPIOp<NNAPIReduce>},
108 {schema::PrimitiveType_Rsqrt, GetNNAPIOp<NNAPICommon>},
109 {schema::PrimitiveType_Softmax, GetNNAPIOp<NNAPISoftmax>},
110 {schema::PrimitiveType_Split, GetNNAPIOp<NNAPISplit>},
111 {schema::PrimitiveType_Stack, GetNNAPIOp<NNAPIStack>},
112 {schema::PrimitiveType_Transpose, GetNNAPIOp<NNAPITranspose>},
113 {schema::PrimitiveType_ScaleFusion, GetNNAPIOp<NNAPIScale>},
114 {schema::PrimitiveType_StridedSlice, GetNNAPIOp<NNAPIStridedSlice>},
115 {schema::PrimitiveType_TopKFusion, GetNNAPIOp<NNAPITopk>},
116 };
117 return mindspore::kSuccess;
118 }
119
Build(DelegateModel<schema::Primitive> * model)120 Status NNAPIDelegate::Build(DelegateModel<schema::Primitive> *model) {
121 MS_CHECK_TRUE_RET(model != nullptr, mindspore::kLiteNullptr);
122 std::vector<NNAPIOp *> condidate_ops;
123 auto begin_iter = model->BeginKernelIterator();
124 for (auto iter = begin_iter; iter != model->EndKernelIterator(); iter++) {
125 auto kernel = *iter;
126 MS_CHECK_TRUE_RET(kernel != nullptr, mindspore::kLiteNullptr);
127 auto primitive = model->GetPrimitive(kernel);
128 MS_ASSERT(primitive != nullptr);
129 auto prim_type = primitive->value_type();
130 if (op_func_lists_.find(prim_type) == op_func_lists_.end()) {
131 MS_LOG(WARNING) << "Unsupported to get NNAPI Op with type of " << prim_type;
132 remained_kernels_.push_back(kernel);
133 continue;
134 }
135 auto get_op_func = op_func_lists_.at(prim_type);
136 MS_CHECK_TRUE_RET(get_op_func != nullptr, mindspore::kLiteNullptr);
137 auto nnapi_op = get_op_func(kernel->name(), primitive, kernel->inputs(), kernel->outputs(), kernel->quant_type());
138 if (nnapi_op == nullptr) {
139 MS_LOG(WARNING) << "Get NNAPI op failed for " << prim_type;
140 remained_kernels_.push_back(kernel);
141 continue;
142 }
143 condidate_ops.push_back(nnapi_op);
144 }
145 if (condidate_ops.empty()) {
146 return mindspore::kSuccess;
147 }
148
149 inputs_ = model->inputs();
150 std::vector<kernel::Kernel *> ready_kenrels;
151 auto ret = FindReadyKernels<kernel::Kernel>(&remained_kernels_, &ready_kenrels);
152 if (ret != RET_OK) {
153 MS_LOG(ERROR) << "FindReadyKernels failed.";
154 for (auto op : condidate_ops) {
155 delete op;
156 op = nullptr;
157 }
158 return mindspore::kLiteError;
159 }
160 sorted_kernels_.insert(sorted_kernels_.end(), ready_kenrels.begin(), ready_kenrels.end());
161 ready_kenrels.clear();
162
163 // for every op, find pre and next ops
164 FindPreNextOps<NNAPIOp>(condidate_ops);
165 while (!condidate_ops.empty()) {
166 auto nnapi_kernel = CreateNNAPISubGraph(model, &condidate_ops);
167 if (nnapi_kernel != nullptr) {
168 sorted_kernels_.push_back(reinterpret_cast<kernel::Kernel *>(nnapi_kernel));
169 nnapi_kernels_.push_back(nnapi_kernel);
170 } else {
171 MS_LOG(WARNING) << "Create NPU Graph failed.";
172 for (auto nnapi_op : condidate_ops) {
173 delete nnapi_op;
174 nnapi_op = nullptr;
175 }
176 return mindspore::kLiteError;
177 }
178
179 ret = FindReadyKernels<kernel::Kernel>(&remained_kernels_, &ready_kenrels);
180 if (ret != RET_OK) {
181 MS_LOG(ERROR) << "FindReadyKernels failed.";
182 for (auto nnapi_op : condidate_ops) {
183 delete nnapi_op;
184 }
185 return mindspore::kLiteError;
186 }
187 sorted_kernels_.insert(sorted_kernels_.end(), ready_kenrels.begin(), ready_kenrels.end());
188 ready_kenrels.clear();
189 }
190 if (!remained_kernels_.empty() || sorted_kernels_.empty()) {
191 MS_LOG(ERROR) << "NNAPI delegate build failed.";
192 return mindspore::kLiteError;
193 }
194 for (auto nnapi_kernel : nnapi_kernels_) {
195 ret = nnapi_kernel->CompileNNAPIModel();
196 if (ret != RET_OK) {
197 MS_LOG(ERROR) << "Compile nnapi model failed.";
198 return mindspore::kLiteError;
199 }
200 }
201
202 // Update the kernels of delegate model.
203 ReplaceNodes(std::make_shared<LiteDelegateGraph>(*model));
204 return mindspore::kSuccess;
205 }
206
ReplaceNodes(const std::shared_ptr<LiteDelegateGraph> & graph)207 void NNAPIDelegate::ReplaceNodes(const std::shared_ptr<LiteDelegateGraph> &graph) {
208 MS_ASSERT(graph != nullptr);
209 auto nodes = graph->nodes();
210 MS_CHECK_TRUE_RET_VOID(nodes != nullptr);
211 nodes->erase(nodes->begin(), nodes->end());
212 nodes->insert(nodes->begin(), sorted_kernels_.begin(), sorted_kernels_.end());
213 }
214
CreateNNAPISubGraph(DelegateModel<schema::Primitive> * model,std::vector<NNAPIOp * > * condidate_ops)215 NNAPISubGraph *NNAPIDelegate::CreateNNAPISubGraph(DelegateModel<schema::Primitive> *model,
216 std::vector<NNAPIOp *> *condidate_ops) {
217 // find kernels that in the same subgraph
218 std::vector<NNAPIOp *> chosen_ops;
219 auto ret = FindReadyKernels<NNAPIOp>(condidate_ops, &chosen_ops);
220 if (ret != RET_OK || chosen_ops.empty()) {
221 MS_LOG(ERROR) << "Find ready NNAPI ops failed.";
222 return nullptr;
223 }
224 // find inputs and outputs
225 auto inputs = GetGraphInTensors<NNAPIOp>(chosen_ops, nullptr);
226 if (inputs.empty()) {
227 MS_LOG(ERROR) << "Find inputs of subgraph failed.";
228 return nullptr;
229 }
230 auto outputs = GetGraphOutTensors<NNAPIOp>(chosen_ops);
231 // find the output tensor which is an input of other kernel.
232 for (auto nnapi_op : chosen_ops) {
233 for (auto kernel : remained_kernels_) {
234 std::for_each(kernel->inputs().begin(), kernel->inputs().end(), [&nnapi_op, &outputs](const MSTensor &tensor) {
235 if (std::find(outputs.begin(), outputs.end(), tensor) == outputs.end() &&
236 std::find(nnapi_op->outputs().begin(), nnapi_op->outputs().end(), tensor) != nnapi_op->outputs().end()) {
237 outputs.push_back(tensor);
238 }
239 });
240 }
241 }
242 if (outputs.empty()) {
243 MS_LOG(ERROR) << "Find outputs of subgraph failed.";
244 return nullptr;
245 }
246
247 auto nnapi_kernel = new (std::nothrow) NNAPISubGraph(chosen_ops, inputs, outputs, devices_, relax_fp32_to_fp16_);
248 if (nnapi_kernel == nullptr) {
249 MS_LOG(ERROR) << "new NNAPI subgraph kernel failed.";
250 return nullptr;
251 }
252 ret = nnapi_kernel->Init();
253 if (ret != RET_OK) {
254 MS_LOG(ERROR) << "Init NNAPI model failed.";
255 delete nnapi_kernel;
256 return nullptr;
257 }
258 ret = nnapi_kernel->CreateNNAPIModel();
259 if (ret != RET_OK) {
260 MS_LOG(ERROR) << "Create NNAPI model failed.";
261 delete nnapi_kernel;
262 return nullptr;
263 }
264 return nnapi_kernel;
265 }
266 } // namespace lite
267 } // namespace mindspore
268