• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright 2021-2023 Huawei Technologies Co., Ltd
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "src/litert/delegate/npu/npu_delegate.h"
18 #include <queue>
19 #include "include/errorcode.h"
20 #include "src/common/prim_util.h"
21 #include "src/litert/delegate/npu/pass/npu_pass_utils.h"
22 #include "src/litert/delegate/npu/op/npu_op.h"
23 #include "src/litert/delegate/npu/op/activation_npu.h"
24 #include "src/litert/delegate/npu/op/argmax_npu.h"
25 #include "src/litert/delegate/npu/op/arithmetic_npu.h"
26 #include "src/litert/delegate/npu/op/arithmetic_self_npu.h"
27 #include "src/litert/delegate/npu/op/avg_pooling_npu.h"
28 #include "src/litert/delegate/npu/op/batchnorm_npu.h"
29 #include "src/litert/delegate/npu/op/cast_npu.h"
30 #include "src/litert/delegate/npu/op/concat_npu.h"
31 #include "src/litert/delegate/npu/op/convolution_npu.h"
32 #include "src/litert/delegate/npu/op/crop_and_resize_npu.h"
33 #include "src/litert/delegate/npu/op/deconvolution_npu.h"
34 #include "src/litert/delegate/npu/op/eltwise_npu.h"
35 #include "src/litert/delegate/npu/op/expand_dims_npu.h"
36 #include "src/litert/delegate/npu/op/fullconnection_npu.h"
37 #include "src/litert/delegate/npu/op/gather_npu.h"
38 #include "src/litert/delegate/npu/op/instance_norm_npu.h"
39 #include "src/litert/delegate/npu/op/matmul_npu.h"
40 #include "src/litert/delegate/npu/op/max_pooling_npu.h"
41 #include "src/litert/delegate/npu/op/pad_npu.h"
42 #include "src/litert/delegate/npu/op/reduce_npu.h"
43 #include "src/litert/delegate/npu/op/reshape_npu.h"
44 #include "src/litert/delegate/npu/op/resize_npu.h"
45 #include "src/litert/delegate/npu/op/scale_npu.h"
46 #include "src/litert/delegate/npu/op/slice_npu.h"
47 #include "src/litert/delegate/npu/op/softmax_npu.h"
48 #include "src/litert/delegate/npu/op/split_npu.h"
49 #include "src/litert/delegate/npu/op/squeeze_npu.h"
50 #include "src/litert/delegate/npu/op/strided_slice_npu.h"
51 #include "src/litert/delegate/npu/op/tile_npu.h"
52 #include "src/litert/delegate/npu/op/transpose_npu.h"
53 #include "src/litert/delegate/npu/op/unsqueeze_npu.h"
54 #include "src/litert/delegate/npu/op/abs_npu.h"
55 #include "src/litert/delegate/npu/op/flatten_npu.h"
56 #include "src/litert/delegate/npu/op/broadcast_to_npu.h"
57 #include "src/litert/delegate/npu/op/unpack_npu.h"
58 #include "src/litert/delegate/npu/npu_graph.h"
59 #include "src/litert/delegate/delegate_utils.h"
60 #include "src/litert/delegate/npu/pass/npu_transform_pass.h"
61 #include "src/litert/delegate/npu/pass/npu_insert_transform_pass.h"
62 #include "src/litert/delegate/npu/pass/npu_fusion_pass.h"
63 
64 using mindspore::lite::RET_ERROR;
65 using mindspore::lite::RET_OK;
66 
67 namespace mindspore::lite {
~NPUDelegate()68 NPUDelegate::~NPUDelegate() {
69   if (npu_manager_ != nullptr) {
70     npu_manager_->Reset();
71     delete npu_manager_;
72     npu_manager_ = nullptr;
73   }
74   if (pass_manager_ != nullptr) {
75     pass_manager_->Clear();
76     delete pass_manager_;
77     pass_manager_ = nullptr;
78   }
79 }
80 
AddPasses()81 Status NPUDelegate::AddPasses() {
82   auto transform_pass = new (std::nothrow) NPUTransformPass();
83   if (transform_pass == nullptr) {
84     MS_LOG(ERROR) << "New NPUTransformPass failed.";
85     return mindspore::kLiteNullptr;
86   }
87   pass_manager_->AddPass(transform_pass);
88 
89   auto insert_transform_pass = new (std::nothrow) NPUInsertTransformPass();
90   if (insert_transform_pass == nullptr) {
91     MS_LOG(ERROR) << "New NPUInsertTransformPass failed.";
92     return mindspore::kLiteNullptr;
93   }
94   pass_manager_->AddPass(insert_transform_pass);
95 
96   auto fusion_pass = new (std::nothrow) NPUFusionPass();
97   if (fusion_pass == nullptr) {
98     MS_LOG(ERROR) << "New NPUFusionPass failed.";
99     return mindspore::kLiteNullptr;
100   }
101   pass_manager_->AddPass(fusion_pass);
102   return mindspore::kSuccess;
103 }
104 
Init()105 Status NPUDelegate::Init() {
106   npu_manager_ = new (std::nothrow) NPUManager(frequency_);
107   if (npu_manager_ == nullptr) {
108     MS_LOG(ERROR) << "New npu manager failed.";
109     return mindspore::kLiteNullptr;
110   }
111   if (!npu_manager_->IsSupportNPU()) {
112     MS_LOG(DEBUG) << "Checking that npu is unsupported.";
113     delete npu_manager_;
114     npu_manager_ = nullptr;
115     return mindspore::kLiteNotSupport;
116   }
117   npu_manager_->SetGraphCacheDir(cache_dir_);
118 
119   pass_manager_ = new (std::nothrow) NPUPassManager();
120   if (pass_manager_ == nullptr) {
121     delete npu_manager_;
122     npu_manager_ = nullptr;
123     MS_LOG(ERROR) << "New npu pass manager failed.";
124     return mindspore::kLiteNullptr;
125   }
126 
127   auto ret = AddPasses();
128   if (ret != mindspore::kSuccess) {
129     MS_LOG(ERROR) << "add passes for npu pass manager failed.";
130     return ret;
131   }
132 
133   op_func_lists_.clear();
134   op_func_lists_ = {
135     {schema::PrimitiveType_Activation, GetNPUOp<ActivationNPUOp>},
136     {schema::PrimitiveType_ArgMaxFusion, GetNPUOp<ArgmaxNPUOp>},
137     {schema::PrimitiveType_MulFusion, GetNPUOp<ArithmeticNPUOp>},
138     {schema::PrimitiveType_AddFusion, GetNPUOp<ArithmeticNPUOp>},
139     {schema::PrimitiveType_SubFusion, GetNPUOp<ArithmeticNPUOp>},
140     {schema::PrimitiveType_DivFusion, GetNPUOp<ArithmeticNPUOp>},
141     {schema::PrimitiveType_FloorMod, GetNPUOp<ArithmeticNPUOp>},
142     {schema::PrimitiveType_FloorDiv, GetNPUOp<ArithmeticNPUOp>},
143     {schema::PrimitiveType_LogicalAnd, GetNPUOp<ArithmeticNPUOp>},
144     {schema::PrimitiveType_LogicalOr, GetNPUOp<ArithmeticNPUOp>},
145     {schema::PrimitiveType_Maximum, GetNPUOp<ArithmeticNPUOp>},
146     {schema::PrimitiveType_Minimum, GetNPUOp<ArithmeticNPUOp>},
147     {schema::PrimitiveType_NotEqual, GetNPUOp<ArithmeticNPUOp>},
148     {schema::PrimitiveType_Equal, GetNPUOp<ArithmeticNPUOp>},
149     {schema::PrimitiveType_Less, GetNPUOp<ArithmeticNPUOp>},
150     {schema::PrimitiveType_LessEqual, GetNPUOp<ArithmeticNPUOp>},
151     {schema::PrimitiveType_Greater, GetNPUOp<ArithmeticNPUOp>},
152     {schema::PrimitiveType_GreaterEqual, GetNPUOp<ArithmeticNPUOp>},
153     {schema::PrimitiveType_Ceil, GetNPUOp<ArithmeticSelfNPUOp>},
154     {schema::PrimitiveType_Cos, GetNPUOp<ArithmeticSelfNPUOp>},
155     {schema::PrimitiveType_Floor, GetNPUOp<ArithmeticSelfNPUOp>},
156     {schema::PrimitiveType_Log, GetNPUOp<ArithmeticSelfNPUOp>},
157     {schema::PrimitiveType_LogicalNot, GetNPUOp<ArithmeticSelfNPUOp>},
158     {schema::PrimitiveType_Neg, GetNPUOp<ArithmeticSelfNPUOp>},
159     {schema::PrimitiveType_Reciprocal, GetNPUOp<ArithmeticSelfNPUOp>},
160     {schema::PrimitiveType_Round, GetNPUOp<ArithmeticSelfNPUOp>},
161     {schema::PrimitiveType_Rsqrt, GetNPUOp<ArithmeticSelfNPUOp>},
162     {schema::PrimitiveType_Sin, GetNPUOp<ArithmeticSelfNPUOp>},
163     {schema::PrimitiveType_Sqrt, GetNPUOp<ArithmeticSelfNPUOp>},
164     {schema::PrimitiveType_Square, GetNPUOp<ArithmeticSelfNPUOp>},
165     {schema::PrimitiveType_ExpFusion, GetNPUOp<ArithmeticSelfNPUOp>},
166     {schema::PrimitiveType_AvgPoolFusion, GetNPUOp<AvgPoolingNPUOp>},
167     {schema::PrimitiveType_MaxPoolFusion, GetNPUOp<MaxPoolingNPUOp>},
168     {schema::PrimitiveType_FusedBatchNorm, GetNPUOp<BatchnormNPUOp>},
169     {schema::PrimitiveType_Cast, GetNPUOp<CastNPUOp>},
170     {schema::PrimitiveType_Concat, GetNPUOp<ConcatNPUOp>},
171     {schema::PrimitiveType_Conv2dTransposeFusion, GetNPUOp<DeconvolutionNPUOp>},
172     {schema::PrimitiveType_CropAndResize, GetNPUOp<CropAndResizeNPUOp>},
173     {schema::PrimitiveType_Eltwise, GetNPUOp<EltwiseNPUOp>},
174     {schema::PrimitiveType_ExpandDims, GetNPUOp<ExpandDimsNPUOp>},
175     {schema::PrimitiveType_FullConnection, GetNPUOp<FullconnectionNPUOp>},
176     {schema::PrimitiveType_Gather, GetNPUOp<GatherNPUOp>},
177     {schema::PrimitiveType_InstanceNorm, GetNPUOp<InstanceNormNPUOp>},
178     {schema::PrimitiveType_MatMulFusion, GetNPUOp<MatMulNPUOp>},
179     {schema::PrimitiveType_PadFusion, GetNPUOp<PadNPUOp>},
180     {schema::PrimitiveType_ReduceFusion, GetNPUOp<ReduceNPUOp>},
181     {schema::PrimitiveType_Reshape, GetNPUOp<ReshapeNPUOp>},
182     {schema::PrimitiveType_Resize, GetNPUOp<ResizeNPUOp>},
183     {schema::PrimitiveType_ScaleFusion, GetNPUOp<ScaleNPUOp>},
184     {schema::PrimitiveType_SliceFusion, GetNPUOp<SliceNPUOp>},
185     {schema::PrimitiveType_Softmax, GetNPUOp<SoftmaxNPUOp>},
186     {schema::PrimitiveType_Split, GetNPUOp<SplitNPUOp>},
187     {schema::PrimitiveType_Squeeze, GetNPUOp<SqueezeNPUOp>},
188     {schema::PrimitiveType_StridedSlice, GetNPUOp<StridedSliceNPUOp>},
189     {schema::PrimitiveType_TileFusion, GetNPUOp<TileNPUOp>},
190     {schema::PrimitiveType_Transpose, GetNPUOp<TransposeNPUOp>},
191     {schema::PrimitiveType_Unsqueeze, GetNPUOp<UnsqueezeNPUOp>},
192     {schema::PrimitiveType_Abs, GetNPUOp<AbsNPUOp>},
193     {schema::PrimitiveType_Flatten, GetNPUOp<FlattenNPUOp>},
194     {schema::PrimitiveType_BroadcastTo, GetNPUOp<BroadcastToNPUOp>},
195     {schema::PrimitiveType_Unstack, GetNPUOp<UnpackNPUOp>},
196   };
197   return mindspore::kSuccess;
198 }
199 
Build(DelegateModel<schema::Primitive> * model)200 Status NPUDelegate::Build(DelegateModel<schema::Primitive> *model) {
201   KernelIter from;
202   KernelIter end;
203   std::vector<NPUOp *> npu_ops;
204   int graph_index = 0;
205   for (auto iter = model->BeginKernelIterator(); iter != model->EndKernelIterator(); iter++) {
206     kernel::Kernel *kernel = *iter;
207     auto npu_op = GetOP(kernel, model->GetPrimitive(kernel));
208     if (npu_op != nullptr) {
209       // If npu_op does not equal nullptr, this kernel can be supported by delegate
210       if (npu_ops.empty()) {
211         from = iter;
212       }
213       npu_ops.push_back(npu_op);
214       end = iter;
215     } else {
216       if (!npu_ops.empty()) {
217         auto npu_graph_kernel = CreateNPUGraph(npu_ops, model, from, end);
218         if (npu_graph_kernel == nullptr) {
219           MS_LOG(ERROR) << "Create NPU Graph failed.";
220           return mindspore::kLiteNullptr;
221         }
222         npu_graph_kernel->set_name("NpuGraph" + std::to_string(graph_index++));
223         iter = model->Replace(from, end + 1, npu_graph_kernel);
224         npu_ops.clear();
225       }
226     }
227   }
228   if (!npu_ops.empty()) {
229     auto npu_graph_kernel = CreateNPUGraph(npu_ops, model, from, end);
230     if (npu_graph_kernel == nullptr) {
231       MS_LOG(ERROR) << "Create NPU Graph failed.";
232       return mindspore::kLiteNullptr;
233     }
234     npu_graph_kernel->set_name("NpuGraph" + std::to_string(graph_index++));
235     model->Replace(from, end + 1, npu_graph_kernel);
236     npu_ops.clear();
237   }
238   auto ret = npu_manager_->LoadOMModel();
239   if (ret != RET_OK) {
240     MS_LOG(ERROR) << "NPU client load model failed.";
241     return mindspore::kLiteError;
242   }
243   return mindspore::kSuccess;
244 }
245 
GetOP(kernel::Kernel * kernel,const schema::Primitive * primitive)246 NPUOp *NPUDelegate::GetOP(kernel::Kernel *kernel, const schema::Primitive *primitive) {
247   if (primitive == nullptr) {
248     MS_LOG(ERROR) << "primitive is NULL!";
249     return nullptr;
250   }
251   if (kernel == nullptr) {
252     MS_LOG(ERROR) << "kernel is NULL!";
253     return nullptr;
254   }
255   auto name = kernel->name();
256   NPUOp *npu_op = nullptr;
257   auto node_type = primitive->value_type();
258   if (node_type == schema::PrimitiveType_Conv2DFusion) {
259     npu_op = GetNPUConvOp(primitive, kernel->inputs(), kernel->outputs(), name);
260   } else if (node_type == schema::PrimitiveType_FullConnection) {
261     npu_op = GetNPUFCOp(primitive, kernel->inputs(), kernel->outputs(), name);
262   } else {
263     if (op_func_lists_.find(node_type) != op_func_lists_.end()) {
264       npu_op = op_func_lists_[node_type](primitive, kernel->inputs(), kernel->outputs(), name);
265     } else {
266       MS_LOG(DEBUG) << "Unsupported op type for NPU: " << node_type;
267       return nullptr;
268     }
269   }
270   if (npu_op == nullptr) {
271     MS_LOG(DEBUG) << "Get NPU op failed, op name: " << name;
272     return nullptr;
273   }
274 
275   for (int i = 0; i < kernel->inputs().size(); i++) {
276     mindspore::MSTensor tensor = kernel->inputs()[i];
277     if (tensor.DataType() == DataType::kNumberTypeFloat16 && tensor.Data() == nullptr) {
278       tensor.SetDataType(DataType::kNumberTypeFloat32);
279     }
280   }
281   for (int i = 0; i < kernel->outputs().size(); i++) {
282     mindspore::MSTensor tensor = kernel->outputs()[i];
283     if (tensor.DataType() == DataType::kNumberTypeFloat16) {
284       tensor.SetDataType(DataType::kNumberTypeFloat32);
285     }
286   }
287 
288   MS_LOG(DEBUG) << "kernel: [" << kernel->name().c_str() << "] op success. "
289                 << "op_type: " << PrimitiveCurVersionTypeName(kernel->type()) << ", "
290                 << "arch: " << kKirinNPU;
291   return npu_op;
292 }
293 
CreateNPUGraph(const std::vector<NPUOp * > & ops,DelegateModel<schema::Primitive> * model,KernelIter from,KernelIter end)294 kernel::Kernel *NPUDelegate::CreateNPUGraph(const std::vector<NPUOp *> &ops, DelegateModel<schema::Primitive> *model,
295                                             KernelIter from, KernelIter end) {
296   auto in_tensors = GetGraphInTensors(ops, nullptr);
297   auto out_tensors = GraphOutTensors<NPUOp>(ops, model, from, end);
298   auto graph_kernel = new (std::nothrow) NPUGraph(ops, npu_manager_, in_tensors, out_tensors);
299   if (graph_kernel == nullptr) {
300     MS_LOG(DEBUG) << "New NPU Graph failed.";
301     return nullptr;
302   }
303   // 1. For every op, find pre and next ops
304   auto ret = graph_kernel->FindPreNextOps();
305   if (ret != RET_OK) {
306     delete graph_kernel;
307     MS_LOG(DEBUG) << "NPU Graph find input and output ops for every op failed.";
308     return nullptr;
309   }
310   // 2. Pass
311   ret = pass_manager_->RunPass(graph_kernel);
312   if (ret != RET_OK) {
313     delete graph_kernel;
314     MS_LOG(DEBUG) << "NPU Graph run pass failed. This function mainly solves the problem that the format is "
315                      "inconsistent and requires interpolation transpose operators.";
316     return nullptr;
317   }
318   // 3. NPUGraph init, create subgraph_kernel and transpose_kernel
319   ret = graph_kernel->Init();
320   if (ret != RET_OK) {
321     delete graph_kernel;
322     MS_LOG(DEBUG) << "NPU subgraph Init failed.";
323     return nullptr;
324   }
325   return graph_kernel;
326 }
327 }  // namespace mindspore::lite
328