1 /**
2 * Copyright 2022 Huawei Technologies Co., Ltd
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include <unordered_set>
18 #include <numeric>
19 #include "nnrt_delegate.h"
20 #include "checker/primitive_check.h"
21 #include "src/common/log_adapter.h"
22 #include "neural_network_runtime/neural_network_runtime.h"
23 #include "neural_network_runtime_inner.h"
24 #include "nnrt_model_kernel.h"
25 #include "schema/model_generated.h"
26 #include "schema/ops_generated.h"
27 #include "flatbuffers/flatbuffers.h"
28 #include "litert/tensor_category.h"
29
30 namespace mindspore {
31 namespace lite {
Init()32 Status NNRTDelegate::Init() {
33 #ifdef SUPPORT_NNRT_METAGRAPH
34 auto ret = mindspore::lite::LoadHiaiFLibraryFromPath(&hiai_handle_);
35 if (!ret || hiai_handle_ == nullptr) {
36 MS_LOG(WARNING) << "Load HiAI_Foundation so failed.";
37 }
38 #endif
39 return kSuccess;
40 }
41
InitExtensionOptions()42 void NNRTDelegate::InitExtensionOptions() {
43 const auto &extensions = nnrt_device_info_.extensions_;
44 mindspore::lite::nnrt::ExtensionOptionsParser::Parse(extensions, &extension_options_);
45 }
46
Build(DelegateModel<schema::Primitive> * model)47 Status NNRTDelegate::Build(DelegateModel<schema::Primitive> *model) {
48 // dequant litegraph
49 auto ret_dequant = DequantLiteGraph(lite_graph_);
50 if (ret_dequant != kSuccess) {
51 MS_LOG(ERROR) << "Dequant litegraph failed.";
52 return kLiteError;
53 }
54 #ifdef SUPPORT_NNRT_METAGRAPH
55 InitExtensionOptions();
56 if (IsKirinNPUWithOnlineInference()) {
57 MS_LOG(DEBUG) << "Choose to build online inference model";
58 return BuildKirinNPUModel(model);
59 }
60 if (IsKirinNPUWithOfflineInference()) {
61 MS_LOG(DEBUG) << "Choose to build offline inference model";
62 return BuildOfflineModel(model);
63 }
64 #endif
65
66 return BuildNormalModel(model);
67 }
68
IsCustomModel() const69 bool NNRTDelegate::IsCustomModel() const {
70 // check if there is only one Cutsom kernel in LiteModel.
71 if (lite_graph_ == nullptr) {
72 return false;
73 }
74 if (lite_graph_->all_nodes_.size() != 1) {
75 return false;
76 }
77 auto node = lite_graph_->all_nodes_[0];
78 if (node == nullptr) {
79 return false;
80 }
81 if (node->node_type_ != mindspore::schema::PrimitiveType_Custom) {
82 return false;
83 }
84 return true;
85 }
86
87 #ifdef SUPPORT_NNRT_METAGRAPH
CheckNPUPrefix(const std::string prefix_name) const88 bool NNRTDelegate::CheckNPUPrefix(const std::string prefix_name) const {
89 const std::string kirin_npu_name_prefix = prefix_name;
90 auto device_id = nnrt_device_info_.device_id_;
91 const char *device_name;
92 auto ret = OH_NNDevice_GetName(device_id, &device_name);
93 if (ret != OH_NN_SUCCESS) {
94 MS_LOG(WARNING) << "Get name of device: " << device_id << " failed, error: " << ret;
95 return false;
96 }
97
98 if (strncmp(kirin_npu_name_prefix.c_str(), device_name, kirin_npu_name_prefix.size()) != 0) {
99 MS_LOG(WARNING) << "strncmp: " << device_id << " failed, device_name: " << device_name;
100 return false;
101 }
102 return true;
103 }
104
IsKirinNPUWithOnlineInference() const105 bool NNRTDelegate::IsKirinNPUWithOnlineInference() const {
106 return CheckNPUPrefix("NPU_");
107 }
108
IsKirinNPUWithOfflineInference() const109 bool NNRTDelegate::IsKirinNPUWithOfflineInference() const {
110 return CheckNPUPrefix("HIAI_F");
111 }
112
BuildKirinNPUModel(DelegateModel<schema::Primitive> * model)113 Status NNRTDelegate::BuildKirinNPUModel(DelegateModel<schema::Primitive> *model) {
114 OH_NNModel *nn_model = OH_NNModel_Construct();
115 if (nn_model == nullptr) {
116 MS_LOG(ERROR) << "Create NNModel failed, result is nullptr";
117 return kLiteNullptr;
118 }
119
120 size_t extension_size = nnrt_device_info_.extensions_.size();
121 std::vector<OH_NN_Extension> extensions;
122 MS_LOG_DEBUG << "set extensions, item number: " << extension_size;
123 const size_t kExtensionNameMax = 128; // This is a length limitation in NNRT API.
124 for (size_t i = 0; i < extension_size; i++) {
125 auto &src_extension = nnrt_device_info_.extensions_[i];
126 OH_NN_Extension dst_extension;
127 dst_extension.name[kExtensionNameMax - 1] = '\0';
128 strncpy(dst_extension.name, src_extension.name.c_str(), kExtensionNameMax - 1);
129 dst_extension.value = (char *)((void *)src_extension.value.data());
130 dst_extension.valueSize = src_extension.value.size();
131 extensions.push_back(dst_extension);
132 MS_LOG_DEBUG << "set extension, item name: " << dst_extension.name << ", value size: " << dst_extension.valueSize;
133 }
134
135 auto ret = OH_NNModel_BuildFromLiteGraph(nn_model, lite_graph_, extensions.data(), extensions.size());
136 if (ret != OH_NN_SUCCESS) {
137 MS_LOG(ERROR) << "Build NNModel failed, ret: " << ret;
138 OH_NNModel_Destroy(&nn_model);
139 return kLiteError;
140 }
141
142 auto ret2 = CreateFullModelKernel(model, nn_model);
143 if (ret2 != kSuccess) {
144 MS_LOG(ERROR) << "Create full model kernel failed, ret: " << ret2;
145 return kLiteError;
146 }
147 return kSuccess;
148 }
149
150 namespace {
151 constexpr int32_t kNum2 = 2;
152 }
153
BuildOfflineModel(DelegateModel<schema::Primitive> * model)154 Status NNRTDelegate::BuildOfflineModel(DelegateModel<schema::Primitive> *model) {
155 if (!IsCustomModel()) {
156 MS_LOG(ERROR) << "not third party model";
157 return kLiteNullptr;
158 }
159
160 auto node = lite_graph_->all_nodes_[0];
161 MS_CHECK_TRUE_RET(node != nullptr, kLiteError);
162 auto input_num = node->input_indices_.size();
163 // at least one input and one OM model buffer(as the last constant input)
164 MS_CHECK_TRUE_RET(input_num >= kNum2, kLiteError);
165 MS_CHECK_TRUE_RET(lite_graph_->all_tensors_.size() >= kNum2, kLiteError);
166 auto tensor = lite_graph_->all_tensors_[node->input_indices_[input_num - 1]];
167 MS_CHECK_TRUE_RET(tensor != nullptr, kLiteError);
168 MS_CHECK_TRUE_RET(tensor->data() != nullptr, kLiteError);
169 const uint8_t *model_buf = static_cast<const uint8_t *>(tensor->data()->data());
170 size_t model_size = tensor->data()->size();
171
172 OH_NNCompilation *nn_compilation = OH_NNCompilation_ConstructWithOfflineModelBuffer(model_buf, model_size);
173 if (nn_compilation == nullptr) {
174 MS_LOG(ERROR) << "Construct Offline NNCompilation failed";
175 return kLiteError;
176 }
177 MS_LOG(DEBUG) << "NNRTDelegate creates NNCompilation success.";
178
179 auto ret_code = InitNNCompilation(nn_compilation);
180 if (ret_code != kSuccess) {
181 MS_LOG(ERROR) << "Init NNCompilation failed";
182 OH_NNCompilation_Destroy(&nn_compilation);
183 return kLiteError;
184 }
185 MS_LOG(DEBUG) << "HiAI F InitNNCompilation success";
186
187 OH_NNExecutor *nn_executor = nullptr;
188 nn_executor = OH_NNExecutor_Construct(nn_compilation);
189 if (nn_executor == nullptr) {
190 MS_LOG(ERROR) << "Construct NNExecutor failed, ret: " << ret_code;
191 OH_NNCompilation_Destroy(&nn_compilation);
192 return kLiteError;
193 }
194 OH_NNCompilation_Destroy(&nn_compilation);
195
196 auto nnrt_model_kernel = new (std::nothrow) NNRTModelKernel(nn_executor, nnrt_device_info_, model->inputs(), model->outputs());
197 if (nnrt_model_kernel == nullptr) {
198 OH_NNExecutor_Destroy(&nn_executor);
199 MS_LOG(ERROR) << "new NNRTModelKernel failed";
200 return kLiteError;
201 }
202 nn_executor_list_.push_back(nn_executor);
203
204 (void)model->Replace(model->BeginKernelIterator(), model->EndKernelIterator(), nnrt_model_kernel);
205 return kSuccess;
206 }
207
CreateFullModelKernel(DelegateModel<schema::Primitive> * model,OH_NNModel * nn_model)208 Status NNRTDelegate::CreateFullModelKernel(DelegateModel<schema::Primitive> *model, OH_NNModel *nn_model) {
209 OH_NNCompilation *nn_compilation = OH_NNCompilation_Construct(nn_model);
210 if (nn_compilation == nullptr) {
211 MS_LOG(ERROR) << "Construct NNCompilation failed";
212 OH_NNModel_Destroy(&nn_model);
213 return kLiteError;
214 }
215 MS_LOG(DEBUG) << "NNRTDelegate creates NNCompilation success.";
216
217 auto ret_code = InitNNCompilation(nn_compilation);
218 if (ret_code != kSuccess) {
219 MS_LOG(ERROR) << "Init NNCompilation failed";
220 OH_NNModel_Destroy(&nn_model);
221 OH_NNCompilation_Destroy(&nn_compilation);
222 return kLiteError;
223 }
224 OH_NNModel_Destroy(&nn_model);
225
226 OH_NNExecutor *nn_executor = nullptr;
227 nn_executor = OH_NNExecutor_Construct(nn_compilation);
228 if (nn_executor == nullptr) {
229 MS_LOG(ERROR) << "Construct NNExecutor failed, ret: " << ret_code;
230 OH_NNCompilation_Destroy(&nn_compilation);
231 return kLiteError;
232 }
233 OH_NNCompilation_Destroy(&nn_compilation);
234
235 auto nnrt_model_kernel = new (std::nothrow) NNRTModelKernel(nn_executor, nnrt_device_info_, model->inputs(), model->outputs());
236 if (nnrt_model_kernel == nullptr) {
237 OH_NNExecutor_Destroy(&nn_executor);
238 MS_LOG(ERROR) << "new NNRTModelKernel failed";
239 return kLiteError;
240 }
241 nn_executor_list_.push_back(nn_executor);
242
243 model->Replace(model->BeginKernelIterator(), model->EndKernelIterator(), nnrt_model_kernel);
244 return kSuccess;
245 }
246 #endif
247
BuildNormalModel(DelegateModel<schema::Primitive> * model)248 Status NNRTDelegate::BuildNormalModel(DelegateModel<schema::Primitive> *model) {
249 MS_LOG(DEBUG) << "Start to build NNRT model.";
250 if ((lite_graph_ == nullptr) || (lite_graph_->sub_graphs_.size() > 1)) {
251 MS_LOG(WARNING) << "LiteGraph contains more than one subgraph. NNRT does not support control-flow model yet, fallback to CPU";
252 return kSuccess;
253 }
254
255 OH_NNModel *full_model = CreateFullNNModel();
256 if (full_model == nullptr) {
257 MS_LOG(WARNING) << "Build full NNModel failed, fallback to CPU";
258 return kSuccess;
259 }
260 std::vector<bool> op_supports = QueryOpSupports(full_model);
261 if (op_supports.empty()) {
262 MS_LOG(WARNING) << "Query no op supports for full model, fallback to CPU";
263 OH_NNModel_Destroy(&full_model);
264 return kSuccess;
265 }
266 auto nnrt_subgraph_ranges = GetNNRTSubgraphRanges(model, op_supports);
267 MS_LOG(INFO) << "Found NNRT subgraph count: " << nnrt_subgraph_ranges.size();
268
269 std::vector<LiteGraph *> sub_lite_graphs;
270 auto ret = CreateLiteGraphForNNRTSubgraph(nnrt_subgraph_ranges, &sub_lite_graphs);
271 if (ret != kSuccess) {
272 OH_NNModel_Destroy(&full_model);
273 MS_LOG(WARNING) << "Create NNRT sub LiteGraph failed, fallback to CPU";
274 return kSuccess;
275 }
276
277 std::vector<NNRTModelKernel *> nnrt_subgraph_kernels;
278 ret = CreateNNRTSubgraphKernels(model, sub_lite_graphs, nnrt_subgraph_ranges, &nnrt_subgraph_kernels);
279 if (ret != kSuccess) {
280 OH_NNModel_Destroy(&full_model);
281 MS_LOG(WARNING) << "Create NNRT subgraph kernel failed, fallback to CPU";
282 return kSuccess;
283 }
284
285 ReplaceNNRTKernelsInDelegateModel(model, nnrt_subgraph_ranges, nnrt_subgraph_kernels);
286 OH_NNModel_Destroy(&full_model);
287 MS_LOG(INFO) << "NNRTDelegate build success.";
288 return kSuccess;
289 }
290
CreateFullNNModel()291 OH_NNModel *NNRTDelegate::CreateFullNNModel() {
292 if (lite_graph_ == nullptr) {
293 MS_LOG(ERROR) << "Lite graph is null";
294 return nullptr;
295 }
296
297 if (lite_graph_->sub_graphs_.empty()) {
298 MS_LOG(ERROR) << "Lite graph must have at lease one subgraph";
299 return nullptr;
300 }
301
302 OH_NNModel *nn_model = OH_NNModel_Construct();
303 if (nn_model == nullptr) {
304 MS_LOG(ERROR) << "Create NNModel failed, result is nullptr";
305 return nullptr;
306 }
307
308 auto ret = OH_NNModel_BuildFromLiteGraph(nn_model, lite_graph_, nullptr, 0);
309 if (ret != OH_NN_SUCCESS) {
310 MS_LOG(ERROR) << "Build NNModel failed, ret: " << ret;
311 OH_NNModel_Destroy(&nn_model);
312 return nullptr;
313 }
314 return nn_model;
315 }
316
QueryOpSupports(OH_NNModel * nn_model)317 std::vector<bool> NNRTDelegate::QueryOpSupports(OH_NNModel *nn_model) {
318 const bool *is_supported = nullptr; // Note: this memory is owned by nn_model, don't free alone.
319 uint32_t op_count = 0;
320 auto ret = OH_NNModel_GetAvailableOperations(nn_model, nnrt_device_info_.device_id_, &is_supported, &op_count);
321 if (ret != OH_NN_SUCCESS) {
322 MS_LOG(WARNING) << "NNModel GetAvailableOperations failed, ret: " << ret
323 << ", maybe caused by dataParcel data length limitation";
324 return {};
325 }
326 std::vector<bool> op_supports(is_supported, is_supported + op_count);
327 return op_supports;
328 }
329
330 /* Find continuous sub-sequence in op_supports. */
GetNNRTSubgraphRanges(DelegateModel<schema::Primitive> * model,const std::vector<bool> & op_supports)331 std::vector<NNRTOpRange> NNRTDelegate::GetNNRTSubgraphRanges(DelegateModel<schema::Primitive> *model,
332 const std::vector<bool> &op_supports) {
333 std::vector<NNRTOpRange> nnrt_subgraph_ranges;
334 NNRTOpRange op_range;
335 bool start_count = false;
336 for (size_t i = 0; i < op_supports.size(); i++) {
337 if (op_supports[i]) {
338 if (start_count == false) {
339 start_count = true;
340 op_range.begin_index_ = i;
341 op_range.begin_iter_ = model->BeginKernelIterator() + i;
342 }
343 } else {
344 if (start_count == true) {
345 start_count = false;
346 op_range.end_index_ = i;
347 op_range.end_iter_ = model->BeginKernelIterator() + i;
348 nnrt_subgraph_ranges.push_back(op_range);
349 }
350 }
351 }
352 // handle last true subsequence
353 if (start_count == true) {
354 op_range.end_index_ = op_supports.size();
355 op_range.end_iter_ = model->EndKernelIterator();
356 nnrt_subgraph_ranges.push_back(op_range);
357 MS_LOG(INFO) << "Schedule NNRT subgraph range: [" << op_range.begin_index_ << ", " << op_range.end_index_ << ")";
358 }
359 return nnrt_subgraph_ranges;
360 }
361
362 /**
363 * This method ONLY works when the follow pre-conditions are satisfied:
364 * 1. The node order of lite_graph_->all_nodes should be consistent with DelegateModel sequence.
365 * This ensures the kernel replacement in DelegateModel based on the re-organizing info from lite_graph_ is correct.
366 * 2. The node indices of lite_graph_->sub_graphs[0].node_indices should be monotonically increasing from 0 to size - 1.
367 */
CreateLiteGraphForNNRTSubgraph(const std::vector<NNRTOpRange> & nnrt_op_ranges,std::vector<LiteGraph * > * sub_lite_graphs)368 Status NNRTDelegate::CreateLiteGraphForNNRTSubgraph(
369 const std::vector<NNRTOpRange> &nnrt_op_ranges,
370 std::vector<LiteGraph *> *sub_lite_graphs) {
371 MS_LOG(INFO) << "Start creating LiteGraph for NNRT subgraph";
372 for (const auto &op_range: nnrt_op_ranges) {
373 MS_LOG(INFO) << "Process op range: [" << op_range.begin_index_ << ", " << op_range.end_index_ << ")";
374 LiteGraph *sub_lite_graph = new (std::nothrow)LiteGraph;
375 if (sub_lite_graph == nullptr) {
376 MS_LOG(ERROR) << "Allocate LiteGraph failed";
377 return kLiteError;
378 }
379 sub_lite_graph->name_ = lite_graph_->name_;
380 sub_lite_graph->version_ = lite_graph_->version_;
381
382 auto sub_graph = new (std::nothrow)LiteGraph::SubGraph;
383 if (sub_graph == nullptr) {
384 MS_LOG(ERROR) << "Allocate SubGraph failed";
385 return kLiteError;
386 }
387 sub_graph->name_ = lite_graph_->name_;
388 sub_lite_graph->sub_graphs_.push_back(sub_graph);
389
390 // deal with all_nodes
391 MS_LOG(INFO) << "Assemble all_nodes...";
392 int new_node_index = 0;
393 std::map<uint32_t, schema::Tensor *> in_tensor_index_map;
394 std::map<uint32_t, schema::Tensor *> out_tensor_index_map;
395 for (size_t index = op_range.begin_index_; index < op_range.end_index_; index++) {
396 LiteGraph::Node *node = new (std::nothrow)LiteGraph::Node;
397 if (node == nullptr) {
398 MS_LOG(ERROR) << "Allocate Node failed";
399 return kLiteError;
400 }
401 *node = *lite_graph_->all_nodes_[index];
402 sub_lite_graph->all_nodes_.push_back(node);
403 sub_graph->node_indices_.push_back(new_node_index++);
404
405 for (auto i: node->input_indices_) {
406 in_tensor_index_map.emplace(i, lite_graph_->all_tensors_[i]);
407 }
408 for (auto i: node->output_indices_) {
409 out_tensor_index_map.emplace(i, lite_graph_->all_tensors_[i]);
410 }
411 }
412
413 // deal with all_tensors
414 MS_LOG(INFO) << "Assemble all_tensors...";
415 std::set<schema::Tensor *> tensors;
416 for (auto iter: in_tensor_index_map) {
417 tensors.emplace(iter.second);
418 }
419 for (auto iter: out_tensor_index_map) {
420 tensors.emplace(iter.second);
421 }
422
423 uint32_t new_index = 0;
424 std::map<schema::Tensor *, uint32_t> new_tensor_maps;
425 for (auto tensor: tensors) {
426 new_tensor_maps.emplace(tensor, new_index++);
427 }
428
429 sub_lite_graph->all_tensors_ = std::vector<schema::Tensor *>(tensors.begin(), tensors.end());
430
431 // deal with every node's input/output indices
432 MS_LOG(INFO) << "Set input/output indices of each node...";
433 for (auto node: sub_lite_graph->all_nodes_) {
434 for (auto &index : node->input_indices_) {
435 index = new_tensor_maps.at(in_tensor_index_map.at(index));
436 }
437 for (auto &index : node->output_indices_) {
438 index = new_tensor_maps.at(out_tensor_index_map.at(index));
439 }
440 }
441
442 // deal with subgraph's input/output indices
443 MS_LOG(INFO) << "Set input/output indices of each subgraph...";
444 sub_graph->tensor_indices_ = std::vector<uint32_t>(tensors.size());
445 std::iota(sub_graph->tensor_indices_.begin(), sub_graph->tensor_indices_.end(), 0U);
446
447 for (auto iter: in_tensor_index_map) {
448 auto new_tensor_index = new_tensor_maps[iter.second];
449 MS_LOG(DEBUG) << "handle input: old: " << iter.first << ", new: " << new_tensor_index << std::endl;
450 if (IsConstTensor(*iter.second)) {
451 MS_LOG(DEBUG) << "- tensor: " << new_tensor_index << " is const." << std::endl;
452 continue;
453 }
454
455 bool is_subgraph_input = true;
456 for (auto node: sub_lite_graph->all_nodes_) {
457 if (std::find(node->output_indices_.begin(), node->output_indices_.end(), new_tensor_index) !=
458 node->output_indices_.end()) {
459 is_subgraph_input = false;
460 MS_LOG(DEBUG) << "- tensor: " << new_tensor_index << " is not subgraph input." << std::endl;
461 break;
462 }
463 }
464 if (is_subgraph_input) {
465 sub_graph->input_indices_.push_back(new_tensor_index);
466 MS_LOG(DEBUG) << "- select tensor: " << new_tensor_index << " as subgraph input." << std::endl;
467 }
468 }
469
470 for (auto iter: out_tensor_index_map) {
471 int new_tensor_index = new_tensor_maps.at(iter.second);
472 MS_LOG(DEBUG) << "handle output: old: " << iter.first << ", new: " << new_tensor_index << std::endl;
473 if (IsConstTensor(*iter.second)) {
474 MS_LOG(DEBUG) << "- tensor: " << new_tensor_index << " is const." << std::endl;
475 continue;
476 }
477
478 bool is_subgraph_output = false;
479 for (size_t i = 0; i < lite_graph_->all_nodes_.size(); i++) {
480 if ((i >= op_range.begin_index_) && (i < op_range.end_index_)) {
481 continue;
482 }
483 auto node = lite_graph_->all_nodes_[i];
484 if (std::find(node->input_indices_.begin(), node->input_indices_.end(), iter.first) !=
485 node->input_indices_.end()) { // As the input of node which does not belong to the subgraph.
486 is_subgraph_output = true;
487 MS_LOG(DEBUG) << "- tensor: " << new_tensor_index << " is original subgraph output. node: " << node->primitive_ << std::endl;
488 break;
489 }
490 }
491 bool is_graph_output = (std::find(lite_graph_->output_indices_.begin(),lite_graph_->output_indices_.end(),
492 iter.first) != lite_graph_->output_indices_.end());
493 if (is_graph_output) {
494 MS_LOG(DEBUG) << "- tensor: " << new_tensor_index << " is graph output." << std::endl;
495 }
496 if (is_subgraph_output || is_graph_output) {
497 sub_graph->output_indices_.push_back(new_tensor_index);
498 MS_LOG(DEBUG) << "- select tensor: " << new_tensor_index << " as subgraph output." << std::endl;
499 }
500 }
501
502 // deal with full-graph's input/output indices
503 sub_lite_graph->input_indices_ = sub_graph->input_indices_;
504 sub_lite_graph->output_indices_ = sub_graph->output_indices_;
505 sub_lite_graphs->push_back(sub_lite_graph);
506 }
507 MS_LOG(INFO) << "Finished creating LiteGraph for NNRT subgraph";
508 return kSuccess;
509 }
510
511 struct TensorLocation {
512 uint32_t node_index; // the index of node which the tensor belongs to.
513 uint32_t tensor_index; // the index of node in/out tensors which the tensor is located at.
514 };
515
InitNNCompilation(OH_NNCompilation * nn_compilation) const516 Status NNRTDelegate::InitNNCompilation(OH_NNCompilation *nn_compilation) const {
517 auto ret_code = OH_NNCompilation_SetDevice(nn_compilation, nnrt_device_info_.device_id_);
518 if (ret_code != OH_NN_SUCCESS) {
519 MS_LOG(ERROR) << "NNCompilation set device id failed, ret: " << ret_code;
520 return kLiteError;
521 }
522 ret_code = OH_NNCompilation_SetPerformanceMode(nn_compilation,
523 (OH_NN_PerformanceMode)(nnrt_device_info_.performance_mode_));
524 if ((ret_code != OH_NN_SUCCESS) && (ret_code != OH_NN_OPERATION_FORBIDDEN)) {
525 MS_LOG(ERROR) << "NNCompilation set performance mode failed, ret: " << ret_code;
526 return kLiteError;
527 }
528 ret_code = OH_NNCompilation_SetPriority(nn_compilation, (OH_NN_Priority)(nnrt_device_info_.priority_));
529 if ((ret_code != OH_NN_SUCCESS) && (ret_code != OH_NN_OPERATION_FORBIDDEN)) {
530 MS_LOG(ERROR) << "NNCompilation set priority failed, ret: " << ret_code;
531 return kLiteError;
532 }
533 ret_code = OH_NNCompilation_EnableFloat16(nn_compilation, nnrt_device_info_.enable_fp16_);
534 if ((ret_code != OH_NN_SUCCESS) && (ret_code != OH_NN_OPERATION_FORBIDDEN)) {
535 MS_LOG(ERROR) << "NNCompilation enable fp16 failed, ret: " << ret_code;
536 return kLiteError;
537 }
538
539 if (!extension_options_.cache_path_.empty()) { // Set cache path if user indeed set it.
540 ret_code = OH_NNCompilation_SetCache(nn_compilation, extension_options_.cache_path_.c_str(),
541 extension_options_.cache_version_);
542 if ((ret_code != OH_NN_SUCCESS) && (ret_code != OH_NN_OPERATION_FORBIDDEN)) {
543 MS_LOG(ERROR) << "NNCompilation set cache failed, ret: " << ret_code;
544 return kLiteError;
545 }
546 }
547
548 #ifdef SUPPORT_NNRT_METAGRAPH
549 if (hiai_handle_ != nullptr && IsKirinNPUWithOfflineInference()) {
550 if (extension_options_.band_mode != mindspore::lite::HIAI_BANDMODE_UNSET) {
551 ret_code = mindspore::lite::HMS_HiAIOptions_SetBandMode(nn_compilation, extension_options_.band_mode);
552 if ((ret_code != OH_NN_SUCCESS) && (ret_code != OH_NN_OPERATION_FORBIDDEN)) {
553 MS_LOG(ERROR) << "NNCompilation set BandMode failed, ret: " << ret_code;
554 return kLiteError;
555 }
556 }
557
558 if (extension_options_.is_optional_quant_setted) {
559 if (extension_options_.quant_config == nullptr || extension_options_.quant_config_size <= 0) {
560 MS_LOG(ERROR) << "NNCompilation set QuantConfig faild, input quant config is invalid, please make sure buffer "
561 << "is not null and size > 0.";
562 return kLiteError;
563 }
564 ret_code = mindspore::lite::HMS_HiAIOptions_SetQuantConfig(nn_compilation, extension_options_.quant_config,
565 extension_options_.quant_config_size);
566 if ((ret_code != OH_NN_SUCCESS) && (ret_code != OH_NN_OPERATION_FORBIDDEN)) {
567 MS_LOG(ERROR) << "NNCompilation set QuantConfig failed, ret: " << ret_code;
568 return kLiteError;
569 }
570 }
571 } else {
572 MS_LOG(WARNING) << "hiai_foundation is nullptr.";
573 }
574 #endif
575
576 ret_code = OH_NNCompilation_Build(nn_compilation);
577 if (ret_code != OH_NN_SUCCESS) {
578 MS_LOG(ERROR) << "Build NNCompilation failed, ret: " << ret_code;
579 return kLiteError;
580 }
581 return kSuccess;
582 }
583
CreateNNRTSubgraphKernels(DelegateModel<schema::Primitive> * model,const std::vector<LiteGraph * > & sub_lite_graphs,const std::vector<NNRTOpRange> & nnrt_subgraph_ranges,std::vector<NNRTModelKernel * > * nnrt_subgraph_kernels)584 Status NNRTDelegate::CreateNNRTSubgraphKernels(DelegateModel<schema::Primitive> *model,
585 const std::vector<LiteGraph *> &sub_lite_graphs, const std::vector<NNRTOpRange> &nnrt_subgraph_ranges,
586 std::vector<NNRTModelKernel *> *nnrt_subgraph_kernels) {
587 for (size_t i = 0; i < sub_lite_graphs.size(); i++) {
588 auto sub_lite_graph = sub_lite_graphs[i];
589
590 OH_NNModel *nn_model = OH_NNModel_Construct();
591 auto ret = OH_NNModel_BuildFromLiteGraph(nn_model, sub_lite_graph, nullptr, 0);
592 if (ret != OH_NN_SUCCESS) {
593 MS_LOG(ERROR) << "Build NNModel failed, ret: " << ret;
594 OH_NNModel_Destroy(&nn_model);
595 return kLiteError;
596 }
597
598 OH_NNCompilation *nn_compilation = OH_NNCompilation_Construct(nn_model);
599 if (nn_compilation == nullptr) {
600 MS_LOG(ERROR) << "Construct NNCompilation failed";
601 OH_NNModel_Destroy(&nn_model);
602 return kLiteError;
603 }
604 MS_LOG(DEBUG) << "NNRTDelegate creates NNCompilation success.";
605
606 auto ret_code = InitNNCompilation(nn_compilation);
607 if (ret_code != kSuccess) {
608 MS_LOG(ERROR) << "Init NNCompilation failed";
609 OH_NNCompilation_Destroy(&nn_compilation);
610 OH_NNModel_Destroy(&nn_model);
611 return kLiteError;
612 }
613
614 OH_NNExecutor *nn_executor = nullptr;
615 nn_executor = OH_NNExecutor_Construct(nn_compilation);
616 if (nn_executor == nullptr) {
617 MS_LOG(ERROR) << "Construct NNExecutor failed, ret: " << ret_code;
618 OH_NNCompilation_Destroy(&nn_compilation);
619 OH_NNModel_Destroy(&nn_model);
620 return kLiteError;
621 }
622 MS_LOG(DEBUG) << "NNRTDelegate creates NNExecutor success.";
623
624 bool format_not_support = false;
625 std::vector<MSTensor> in_tensors;
626 for (auto index: sub_lite_graph->sub_graphs_[0]->input_indices_) {
627 TensorLocation location;
628 for (auto node_index: sub_lite_graph->sub_graphs_[0]->node_indices_) {
629 auto node = sub_lite_graph->all_nodes_[node_index];
630 auto iter = std::find(node->input_indices_.begin(), node->input_indices_.end(), index);
631 if (iter != node->input_indices_.end()) {
632 uint32_t tensor_index = iter - node->input_indices_.begin();
633 location.node_index = node_index;
634 location.tensor_index = tensor_index;
635 MS_LOG(INFO) << "Found graph input index: " << index << " is the " << tensor_index << "th input of the node " << node->primitive_;
636 break;
637 }
638 }
639 KernelIter kernel_iter = nnrt_subgraph_ranges[i].begin_iter_ + location.node_index;
640 in_tensors.push_back((*kernel_iter)->inputs()[location.tensor_index]);
641 if (in_tensors.back().format() != Format::NHWC) {
642 format_not_support = true;
643 break ;
644 }
645 }
646
647 std::vector<MSTensor> out_tensors;
648 for (auto index: sub_lite_graph->sub_graphs_[0]->output_indices_) {
649 TensorLocation location;
650 for (auto node_index: sub_lite_graph->sub_graphs_[0]->node_indices_) {
651 auto node = sub_lite_graph->all_nodes_[node_index];
652 auto iter = std::find(node->output_indices_.begin(), node->output_indices_.end(), index);
653 if (iter != node->output_indices_.end()) {
654 uint32_t tensor_index = iter - node->output_indices_.begin();
655 location.node_index = node_index;
656 location.tensor_index = tensor_index;
657 MS_LOG(INFO) << "Found graph output index: " << index << " is the " << tensor_index << "th output of the node " << node->primitive_;
658 break;
659 }
660 }
661 KernelIter kernel_iter = nnrt_subgraph_ranges[i].begin_iter_ + location.node_index;
662 out_tensors.push_back((*kernel_iter)->outputs()[location.tensor_index]);
663 if (out_tensors.back().format() != Format::NHWC) {
664 format_not_support = true;
665 break ;
666 }
667 }
668 if (format_not_support) {
669 MS_LOG(WARNING) << "Not support in/out tensor format, skip this subgraph";
670 OH_NNCompilation_Destroy(&nn_compilation);
671 OH_NNModel_Destroy(&nn_model);
672 nnrt_subgraph_kernels->push_back(nullptr);
673 continue ;
674 }
675
676 auto nnrt_model_kernel = new (std::nothrow) NNRTModelKernel(nn_executor, nnrt_device_info_, in_tensors, out_tensors);
677 if (nnrt_model_kernel == nullptr) {
678 MS_LOG(ERROR) << "new NNRTModelKernel failed";
679 return kLiteError;
680 }
681 nn_executor_list_.push_back(nn_executor);
682 OH_NNCompilation_Destroy(&nn_compilation);
683 OH_NNModel_Destroy(&nn_model);
684 nnrt_subgraph_kernels->push_back(nnrt_model_kernel);
685 }
686 return kSuccess;
687 }
688
ReplaceNNRTKernelsInDelegateModel(DelegateModel<schema::Primitive> * model,const std::vector<NNRTOpRange> & nnrt_subgraph_ranges,const std::vector<NNRTModelKernel * > & nnrt_subgraph_kernels)689 void NNRTDelegate::ReplaceNNRTKernelsInDelegateModel(DelegateModel<schema::Primitive> *model,
690 const std::vector<NNRTOpRange> &nnrt_subgraph_ranges,
691 const std::vector<NNRTModelKernel *> &nnrt_subgraph_kernels) {
692 // Here we perform the replacement from back to front intentionally! If replace from front to end, the kernel
693 // sequence would shrink and the later begin_iter_/end_iter_ may be erased already.
694 for (int i = nnrt_subgraph_ranges.size() - 1; i >= 0; i--) {
695 if (nnrt_subgraph_kernels[i] == nullptr) {
696 continue;
697 }
698 auto from = nnrt_subgraph_ranges[i].begin_iter_;
699 auto end = nnrt_subgraph_ranges[i].end_iter_;
700 (void)model->Replace(from, end, nnrt_subgraph_kernels[i]);
701 MS_LOG(INFO) << "Replace nnrt subgraph kernel in range: [" << (from - model->BeginKernelIterator())
702 << ", " << (end - model->BeginKernelIterator()) << ")";
703 }
704 }
705
PrepareInputs(DelegateModel<schema::Primitive> * model,OH_NNExecutor * oh_nn_executor)706 Status NNRTDelegate::PrepareInputs(DelegateModel<schema::Primitive> *model,
707 OH_NNExecutor *oh_nn_executor) {
708 auto input_tensors = model->inputs();
709 for (size_t i = 0; i < input_tensors.size(); i++) {
710 auto tensor = input_tensors[i];
711 auto tensor_shape = tensor.Shape();
712 auto tmp_quant_param = tensor.QuantParams();
713 OH_NN_QuantParam *quant_param = nullptr;
714 std::vector<uint32_t> bit_num;
715 std::vector<double> scale;
716 std::vector<int32_t> zero_point;
717 if (!tmp_quant_param.empty()) {
718 quant_param = new(std::nothrow) OH_NN_QuantParam;
719 if (quant_param == nullptr) {
720 MS_LOG(ERROR) << "new OH_NN_QuantParam failed.";
721 return kLiteError;
722 }
723 for (auto qparam : tmp_quant_param) {
724 bit_num.emplace_back(qparam.bit_num);
725 scale.emplace_back(qparam.scale);
726 zero_point.emplace_back(qparam.zero_point);
727 }
728 quant_param->quantCount = tmp_quant_param.size();
729 quant_param->numBits = bit_num.data();
730 quant_param->scale = scale.data();
731 quant_param->zeroPoint = zero_point.data();
732 }
733 auto oprend = new(std::nothrow) OH_NN_Tensor;
734 if (oprend == nullptr) {
735 MS_LOG(ERROR) << "new OH_NN_Tensor Failed";
736 return kLiteError;
737 }
738 oprend->dataType = CastToNNRTDataType(tensor.DataType());
739 oprend->dimensionCount = tensor_shape.size();
740
741 std::vector<int32_t> dimensions_list;
742 for (auto shape : tensor_shape) {
743 if (shape < INT32_MAX) {
744 dimensions_list.emplace_back(static_cast<int32_t>(shape));
745 } else {
746 MS_LOG(ERROR) << "NNExecutor SetInput failed,tensor dimension is is too large, max dim = " << INT32_MAX
747 << ", but get dimension = " << shape;
748 return kLiteError;
749 }
750 }
751 oprend->dimensions = dimensions_list.data();
752 oprend->quantParam = quant_param;
753 oprend->type = OH_NN_TENSOR;
754 OH_NN_ReturnCode ret_code =
755 OH_NNExecutor_SetInput(oh_nn_executor, i, oprend, tensor.MutableData(), tensor.DataSize());
756 delete (oprend);
757
758 if (!tmp_quant_param.empty()) {
759 delete (quant_param);
760 quant_param = nullptr;
761 }
762
763 if (ret_code != OH_NN_SUCCESS) {
764 MS_LOG(ERROR) << "NNExecutor SetInput failed, current input tensor is" << tensor.Name()
765 << "OH_NN_ReturnCode = " << ret_code;
766 return kLiteError;
767 }
768 }
769 return kSuccess;
770 }
771
CastToNNRTDataType(DataType data_type)772 OH_NN_DataType NNRTDelegate::CastToNNRTDataType(DataType data_type) {
773 const std::unordered_map<DataType, OH_NN_DataType> kDataTypeMap = {
774 {DataType::kNumberTypeBool, OH_NN_BOOL},
775 {DataType::kNumberTypeInt8, OH_NN_INT8},
776 {DataType::kNumberTypeInt16, OH_NN_INT16},
777 {DataType::kNumberTypeInt32, OH_NN_INT32},
778 {DataType::kNumberTypeInt64, OH_NN_INT64},
779 {DataType::kNumberTypeUInt8, OH_NN_UINT8},
780 {DataType::kNumberTypeUInt16, OH_NN_UINT16},
781 {DataType::kNumberTypeUInt32, OH_NN_UINT32},
782 {DataType::kNumberTypeUInt64, OH_NN_UINT64},
783 {DataType::kNumberTypeFloat16, OH_NN_FLOAT16},
784 {DataType::kNumberTypeFloat32, OH_NN_FLOAT32},
785 {DataType::kNumberTypeFloat64, OH_NN_FLOAT64},
786 };
787
788 auto iter = kDataTypeMap.find(data_type);
789 if (iter == kDataTypeMap.end()) {
790 return OH_NN_UNKNOWN;
791 }
792 return iter->second;
793 }
794
PrepareOutputs(DelegateModel<schema::Primitive> * model,OH_NNExecutor * oh_nn_executor)795 Status NNRTDelegate::PrepareOutputs(DelegateModel<schema::Primitive> *model,
796 OH_NNExecutor *oh_nn_executor) {
797 auto output_tensors = model->outputs();
798 for (size_t i = 0; i < output_tensors.size(); i++) {
799 auto tensor = output_tensors[i];
800 OH_NN_ReturnCode ret_code = OH_NNExecutor_SetOutput(oh_nn_executor, i, tensor.MutableData(), tensor.DataSize());
801 if (ret_code != OH_NN_SUCCESS) {
802 MS_LOG(ERROR) << "NNExecutor SetOutput failed, current out tensor is" << tensor.Name()
803 << ", OH_NN_ReturnCode = " << ret_code;
804 return kLiteError;
805 }
806 }
807 return kSuccess;
808 }
809
TensorToSchemaTensor(Tensor * lite_tensor,schema::Tensor * schema_tensor)810 schema::Tensor *NNRTDelegate::TensorToSchemaTensor(Tensor *lite_tensor, schema::Tensor *schema_tensor) {
811 flatbuffers::FlatBufferBuilder fbb(1024);
812 auto shape = lite_tensor->shape();
813 std::vector<int32_t> dim_vec(shape.begin(), shape.end());
814
815 auto quant_params = lite_tensor->quant_params();
816 std::vector<flatbuffers::Offset<mindspore::schema::QuantParam>> quant_vec;
817 quant_vec.reserve(quant_params.size());
818 for (auto q_param : quant_params) {
819 quant_vec.emplace_back(schema::CreateQuantParam(fbb, q_param.scale, q_param.zeroPoint, 0, 0, true, q_param.bitNum));
820 }
821 auto quant_clusters = lite_tensor->quant_clusters();
822
823 auto external_data = schema_tensor->externalData();
824 std::vector<flatbuffers::Offset<mindspore::schema::ExternalData>> external_data_vec;
825 if (external_data != nullptr) {
826 for (auto ed : *external_data) {
827 external_data_vec.emplace_back(schema::CreateExternalDataDirect(fbb, ed->checkSum()->c_str(), ed->location()->c_str(), 0, ed->length()));
828 }
829 }
830 uint8_t *data_src = reinterpret_cast<uint8_t *>(lite_tensor->data());
831 std::vector<uint8_t> data_vec(data_src, data_src + lite_tensor->Size());
832 auto tensor_offset = schema::CreateTensorDirect(fbb, schema_tensor->nodeType(), lite_tensor->data_type(), &dim_vec,
833 schema_tensor->format(), 0, 0, &data_vec, &quant_vec,
834 &quant_clusters, schema_tensor->name()->c_str(),
835 schema_tensor->enableHuffmanCode(),
836 mindspore::schema::WeightQuantCompressType_NONE, &external_data_vec);
837 fbb.Finish(tensor_offset);
838
839 auto buf = fbb.GetBufferPointer();
840 if (buf == nullptr) {
841 MS_LOG(ERROR) << "GetBufferPointer return nullptr";
842 fbb.Clear();
843 return nullptr;
844 }
845 size_t byte_num = fbb.GetSize();
846 auto tensor_buf = reinterpret_cast<char *>(malloc(byte_num));
847 if (tensor_buf == nullptr) {
848 MS_LOG(ERROR) << "malloc primitive_buf_ failed";
849 fbb.Clear();
850 return nullptr;
851 }
852 memcpy(tensor_buf, buf, fbb.GetSize());
853 auto tensor = flatbuffers::GetRoot<schema::Tensor>(tensor_buf);
854 fbb.Clear();
855 if (tensor != nullptr) {
856 // use to free tensor_buf
857 auto iter = dequant_schema_tensors_buffer_map_.find(const_cast<schema::Tensor *>(tensor));
858 if (iter != dequant_schema_tensors_buffer_map_.end()) {
859 MS_LOG(ERROR) << "schema tensor is duplicated.";
860 return nullptr;
861 }
862 dequant_schema_tensors_buffer_map_[const_cast<schema::Tensor *>(tensor)] = tensor_buf;
863 }
864 return const_cast<schema::Tensor *>(tensor);
865 }
866
DequantNodeInputs(LiteGraph::Node * node)867 int NNRTDelegate::DequantNodeInputs(LiteGraph::Node *node) {
868 auto in_size = node->input_indices_.size();
869 int ret = RET_OK;
870 for (size_t i = 0; i < in_size; i++) {
871 auto tensor_index = node->input_indices_[i];
872 auto *src_tensor = lite_graph_->all_tensors_[tensor_index];
873 auto input = dequant_src_tensors_->at(tensor_index);
874 if (!input->IsConst() || !(src_tensor->dataType() == kNumberTypeInt8 ||
875 src_tensor->dataType() == kNumberTypeInt16 || src_tensor->dataType() == kNumberTypeInt32)) {
876 continue;
877 }
878 auto dst_tensor = TensorToSchemaTensor(input, src_tensor);
879 if (dst_tensor != nullptr) {
880 dequant_schema_tensors_.emplace(tensor_index, dst_tensor);
881 replaced_schema_tensors_.emplace_back(src_tensor);
882 } else {
883 MS_LOG(ERROR) << "create dequant schema tensor failed, node: " << node->name_ << ", tensor_index: "
884 << tensor_index;
885 ret = RET_ERROR;
886 break;
887 }
888 }
889 return ret;
890 }
891
DequantLiteGraph(LiteGraph * lite_graph)892 Status NNRTDelegate::DequantLiteGraph(LiteGraph *lite_graph) {
893 for (auto node_index : lite_graph->sub_graphs_[0]->node_indices_) {
894 auto node = lite_graph->all_nodes_[node_index];
895
896 if (node->quant_type_ != static_cast<int>(schema::QuantType_QUANT_WEIGHT)) {
897 continue;
898 }
899 auto ret = DequantNodeInputs(node);
900 if (ret != RET_OK) {
901 MS_LOG(ERROR) << "Dequant node failed: " << ret << ", node_name: " << node->name_;
902 for (auto iter : dequant_schema_tensors_) {
903 delete iter.second;
904 iter.second = nullptr;
905 }
906 return kLiteNotSupport;
907 }
908 node->quant_type_ = schema::QuantType_QUANT_NONE;
909 }
910 for (auto iter : dequant_schema_tensors_) {
911 lite_graph_->all_tensors_[iter.first] = iter.second;
912 }
913 return kSuccess;
914 }
915
ShallowCopyLiteGraph(const lite::LiteGraph & lite_graph)916 void NNRTDelegate::ShallowCopyLiteGraph(const lite::LiteGraph &lite_graph) {
917 std::vector<LiteGraph::Node *> node_list;
918 node_list.reserve(lite_graph.all_nodes_.size());
919 // copy node
920 for (auto node : lite_graph.all_nodes_) {
921 auto new_node = new(std::nothrow) LiteGraph::Node;
922 if (new_node == nullptr) {
923 MS_LOG(ERROR) << " new LiteGraph::Node failed.";
924 return;
925 }
926 new_node->name_ = node->name_;
927 new_node->op_type_ = node->op_type_;
928 new_node->node_type_ = node->node_type_;
929 new_node->primitive_ = node->primitive_;
930 new_node->base_operator_ = node->base_operator_;
931 new_node->input_indices_ = node->input_indices_;
932 new_node->output_indices_ = node->output_indices_;
933 new_node->quant_type_ = node->quant_type_;
934 new_node->device_type_ = node->device_type_;
935 node_list.emplace_back(new_node);
936 }
937 // copy subgraph
938 std::vector<LiteGraph::SubGraph *> subgraph_list;
939 for (auto subgraph : lite_graph.sub_graphs_) {
940 auto new_subgraph = new(std::nothrow) LiteGraph::SubGraph;
941 if (new_subgraph == nullptr) {
942 MS_LOG(ERROR) << "new LiteGraph::Subgraph failed.";
943 return;
944 }
945 new_subgraph->name_ = subgraph->name_;
946 new_subgraph->input_indices_ = subgraph->input_indices_;
947 new_subgraph->output_indices_ = subgraph->output_indices_;
948 new_subgraph->node_indices_ = subgraph->node_indices_;
949 subgraph_list.emplace_back(new_subgraph);
950 }
951 for (auto tensor : lite_graph.all_tensors_) {
952 Status ret = lite::CheckTensorSupported(static_cast<const schema::Tensor *>(tensor));
953 if (ret == kLiteError) {
954 MS_LOG(ERROR) << "tensor supported check failed.";
955 return;
956 }
957 }
958
959 lite_graph_ = new(std::nothrow) lite::LiteGraph();
960 if (lite_graph_ == nullptr) {
961 MS_LOG(ERROR) << "new LiteGraph failed.";
962 return;
963 }
964
965 lite_graph_->name_ = lite_graph.name_;
966 lite_graph_->version_ = lite_graph.version_;
967 lite_graph_->input_indices_ = lite_graph.input_indices_;
968 lite_graph_->output_indices_ = lite_graph.output_indices_;
969 lite_graph_->all_tensors_ = lite_graph.all_tensors_;
970 lite_graph_->all_nodes_ = node_list;
971 lite_graph_->sub_graphs_ = subgraph_list;
972 MS_LOG(INFO) << "ShallowCopyLiteGraph success.";
973 }
974
FreeLiteGraph(lite::LiteGraph ** liteGraph)975 void NNRTDelegate::FreeLiteGraph(lite::LiteGraph **liteGraph) {
976 if (liteGraph != nullptr && *liteGraph != nullptr) {
977 MS_LOG(INFO) << "start to free LiteGraph.";
978 auto graph = *liteGraph;
979 graph->name_.clear();
980 graph->input_indices_.clear();
981 graph->output_indices_.clear();
982 MS_LOG(INFO) << "Destroying nodes.";
983 // node
984 for (size_t idx = 0; idx < graph->all_nodes_.size(); idx++) {
985 if (graph->all_nodes_[idx] != nullptr) {
986 delete graph->all_nodes_[idx];
987 graph->all_nodes_[idx] = nullptr;
988 }
989 }
990 MS_LOG(INFO) << "Destroying subgraphs.";
991 // subgraph
992 for (size_t idx = 0; idx < graph->sub_graphs_.size(); idx++) {
993 if (graph->sub_graphs_[idx] != nullptr) {
994 delete graph->sub_graphs_[idx];
995 graph->sub_graphs_[idx] = nullptr;
996 }
997 }
998 // graph
999 delete graph;
1000 *liteGraph = nullptr;
1001 } else {
1002 MS_LOG(WARNING) << "nnrt_lite_graph is nullptr, no need to free.";
1003 }
1004 }
1005
~NNRTDelegate()1006 NNRTDelegate::~NNRTDelegate() {
1007 for (size_t i = 0; i < nn_executor_list_.size(); i++) {
1008 if (nn_executor_list_[i] != nullptr) {
1009 MS_LOG(INFO) << "start NNExecutor Destroy.";
1010 OH_NNExecutor_Destroy(&(nn_executor_list_[i]));
1011 MS_LOG(INFO) << "Destroy NNExecutor Finish.";
1012 }
1013 }
1014 if (lite_graph_ != nullptr) {
1015 MS_LOG(ERROR) << "Delete NNRTDelegate.";
1016 }
1017 for (auto iter : dequant_schema_tensors_buffer_map_) {
1018 if (iter.second != nullptr) {
1019 free(iter.second);
1020 iter.second = nullptr;
1021 }
1022 }
1023 dequant_schema_tensors_buffer_map_.clear();
1024 }
1025 } // namespace lite
1026 } // namespace mindspore
1027