• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright 2022 Huawei Technologies Co., Ltd
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include <unordered_set>
18 #include <numeric>
19 #include "nnrt_delegate.h"
20 #include "checker/primitive_check.h"
21 #include "src/common/log_adapter.h"
22 #include "neural_network_runtime/neural_network_runtime.h"
23 #include "neural_network_runtime_inner.h"
24 #include "nnrt_model_kernel.h"
25 #include "schema/model_generated.h"
26 #include "schema/ops_generated.h"
27 #include "flatbuffers/flatbuffers.h"
28 #include "litert/tensor_category.h"
29 
30 namespace mindspore {
31 namespace lite {
Init()32 Status NNRTDelegate::Init() {
33 #ifdef SUPPORT_NNRT_METAGRAPH
34   auto ret = mindspore::lite::LoadHiaiFLibraryFromPath(&hiai_handle_);
35   if (!ret || hiai_handle_ == nullptr) {
36     MS_LOG(WARNING) << "Load HiAI_Foundation so failed.";
37   }
38 #endif
39   return kSuccess;
40 }
41 
InitExtensionOptions()42 void NNRTDelegate::InitExtensionOptions() {
43   const auto &extensions = nnrt_device_info_.extensions_;
44   mindspore::lite::nnrt::ExtensionOptionsParser::Parse(extensions, &extension_options_);
45 }
46 
Build(DelegateModel<schema::Primitive> * model)47 Status NNRTDelegate::Build(DelegateModel<schema::Primitive> *model) {
48   // dequant litegraph
49   auto ret_dequant = DequantLiteGraph(lite_graph_);
50   if (ret_dequant != kSuccess) {
51     MS_LOG(ERROR) << "Dequant litegraph failed.";
52     return kLiteError;
53   }
54 #ifdef SUPPORT_NNRT_METAGRAPH
55   InitExtensionOptions();
56   if (IsKirinNPUWithOnlineInference()) {
57     MS_LOG(DEBUG) << "Choose to build online inference model";
58     return BuildKirinNPUModel(model);
59   }
60   if (IsKirinNPUWithOfflineInference()) {
61     MS_LOG(DEBUG) << "Choose to build offline inference model";
62     return BuildOfflineModel(model);
63   }
64 #endif
65 
66   return BuildNormalModel(model);
67 }
68 
IsCustomModel() const69 bool NNRTDelegate::IsCustomModel() const {
70   // check if there is only one Cutsom kernel in LiteModel.
71   if (lite_graph_ == nullptr) {
72     return false;
73   }
74   if (lite_graph_->all_nodes_.size() != 1) {
75     return false;
76   }
77   auto node = lite_graph_->all_nodes_[0];
78   if (node == nullptr) {
79     return false;
80   }
81   if (node->node_type_ != mindspore::schema::PrimitiveType_Custom) {
82     return false;
83   }
84   return true;
85 }
86 
87 #ifdef SUPPORT_NNRT_METAGRAPH
CheckNPUPrefix(const std::string prefix_name) const88 bool NNRTDelegate::CheckNPUPrefix(const std::string prefix_name) const {
89   const std::string kirin_npu_name_prefix = prefix_name;
90   auto device_id = nnrt_device_info_.device_id_;
91   const char *device_name;
92   auto ret = OH_NNDevice_GetName(device_id, &device_name);
93   if (ret != OH_NN_SUCCESS) {
94     MS_LOG(WARNING) << "Get name of device: " << device_id << " failed, error: " << ret;
95     return false;
96   }
97 
98   if (strncmp(kirin_npu_name_prefix.c_str(), device_name, kirin_npu_name_prefix.size()) != 0) {
99     MS_LOG(WARNING) << "strncmp: " << device_id << " failed, device_name: " << device_name;
100     return false;
101   }
102   return true;
103 }
104 
IsKirinNPUWithOnlineInference() const105 bool NNRTDelegate::IsKirinNPUWithOnlineInference() const {
106   return CheckNPUPrefix("NPU_");
107 }
108 
IsKirinNPUWithOfflineInference() const109 bool NNRTDelegate::IsKirinNPUWithOfflineInference() const {
110   return CheckNPUPrefix("HIAI_F");
111 }
112 
BuildKirinNPUModel(DelegateModel<schema::Primitive> * model)113 Status NNRTDelegate::BuildKirinNPUModel(DelegateModel<schema::Primitive> *model) {
114   OH_NNModel *nn_model = OH_NNModel_Construct();
115   if (nn_model == nullptr) {
116     MS_LOG(ERROR) << "Create NNModel failed, result is nullptr";
117     return kLiteNullptr;
118   }
119 
120   size_t extension_size = nnrt_device_info_.extensions_.size();
121   std::vector<OH_NN_Extension> extensions;
122   MS_LOG_DEBUG << "set extensions, item number: " << extension_size;
123   const size_t kExtensionNameMax = 128; // This is a length limitation in NNRT API.
124   for (size_t i = 0; i < extension_size; i++) {
125     auto &src_extension = nnrt_device_info_.extensions_[i];
126     OH_NN_Extension dst_extension;
127     dst_extension.name[kExtensionNameMax - 1] = '\0';
128     strncpy(dst_extension.name, src_extension.name.c_str(), kExtensionNameMax - 1);
129     dst_extension.value = (char *)((void *)src_extension.value.data());
130     dst_extension.valueSize = src_extension.value.size();
131     extensions.push_back(dst_extension);
132     MS_LOG_DEBUG << "set extension, item name: " << dst_extension.name << ", value size: " << dst_extension.valueSize;
133   }
134 
135   auto ret = OH_NNModel_BuildFromLiteGraph(nn_model, lite_graph_, extensions.data(), extensions.size());
136   if (ret != OH_NN_SUCCESS) {
137     MS_LOG(ERROR) << "Build NNModel failed, ret: " << ret;
138     OH_NNModel_Destroy(&nn_model);
139     return kLiteError;
140   }
141 
142   auto ret2 =  CreateFullModelKernel(model, nn_model);
143   if (ret2 != kSuccess) {
144     MS_LOG(ERROR) << "Create full model kernel failed, ret: " << ret2;
145     return kLiteError;
146   }
147   return kSuccess;
148 }
149 
150 namespace {
151 constexpr int32_t kNum2 = 2;
152 }
153 
BuildOfflineModel(DelegateModel<schema::Primitive> * model)154 Status NNRTDelegate::BuildOfflineModel(DelegateModel<schema::Primitive> *model) {
155   if (!IsCustomModel()) {
156     MS_LOG(ERROR) << "not third party model";
157     return kLiteNullptr;
158   }
159 
160   auto node = lite_graph_->all_nodes_[0];
161   MS_CHECK_TRUE_RET(node != nullptr, kLiteError);
162   auto input_num = node->input_indices_.size();
163   // at least one input and one OM model buffer(as the last constant input)
164   MS_CHECK_TRUE_RET(input_num >= kNum2, kLiteError);
165   MS_CHECK_TRUE_RET(lite_graph_->all_tensors_.size() >= kNum2, kLiteError);
166   auto tensor = lite_graph_->all_tensors_[node->input_indices_[input_num - 1]];
167   MS_CHECK_TRUE_RET(tensor != nullptr, kLiteError);
168   MS_CHECK_TRUE_RET(tensor->data() != nullptr, kLiteError);
169   const uint8_t *model_buf = static_cast<const uint8_t *>(tensor->data()->data());
170   size_t model_size = tensor->data()->size();
171 
172   OH_NNCompilation *nn_compilation = OH_NNCompilation_ConstructWithOfflineModelBuffer(model_buf, model_size);
173   if (nn_compilation == nullptr) {
174     MS_LOG(ERROR) << "Construct Offline NNCompilation failed";
175     return kLiteError;
176   }
177   MS_LOG(DEBUG) << "NNRTDelegate creates NNCompilation success.";
178 
179   auto ret_code = InitNNCompilation(nn_compilation);
180   if (ret_code != kSuccess) {
181     MS_LOG(ERROR) << "Init NNCompilation failed";
182     OH_NNCompilation_Destroy(&nn_compilation);
183     return kLiteError;
184   }
185   MS_LOG(DEBUG) << "HiAI F InitNNCompilation success";
186 
187   OH_NNExecutor *nn_executor = nullptr;
188   nn_executor = OH_NNExecutor_Construct(nn_compilation);
189   if (nn_executor == nullptr) {
190     MS_LOG(ERROR) << "Construct NNExecutor failed, ret: " << ret_code;
191     OH_NNCompilation_Destroy(&nn_compilation);
192     return kLiteError;
193   }
194   OH_NNCompilation_Destroy(&nn_compilation);
195 
196   auto nnrt_model_kernel = new (std::nothrow) NNRTModelKernel(nn_executor, nnrt_device_info_, model->inputs(), model->outputs());
197   if (nnrt_model_kernel == nullptr) {
198     OH_NNExecutor_Destroy(&nn_executor);
199     MS_LOG(ERROR) << "new NNRTModelKernel failed";
200     return kLiteError;
201   }
202   nn_executor_list_.push_back(nn_executor);
203 
204   (void)model->Replace(model->BeginKernelIterator(), model->EndKernelIterator(), nnrt_model_kernel);
205   return kSuccess;
206 }
207 
CreateFullModelKernel(DelegateModel<schema::Primitive> * model,OH_NNModel * nn_model)208 Status NNRTDelegate::CreateFullModelKernel(DelegateModel<schema::Primitive> *model, OH_NNModel *nn_model) {
209   OH_NNCompilation *nn_compilation = OH_NNCompilation_Construct(nn_model);
210   if (nn_compilation == nullptr) {
211     MS_LOG(ERROR) << "Construct NNCompilation failed";
212     OH_NNModel_Destroy(&nn_model);
213     return kLiteError;
214   }
215   MS_LOG(DEBUG) << "NNRTDelegate creates NNCompilation success.";
216 
217   auto ret_code = InitNNCompilation(nn_compilation);
218   if (ret_code != kSuccess) {
219     MS_LOG(ERROR) << "Init NNCompilation failed";
220     OH_NNModel_Destroy(&nn_model);
221     OH_NNCompilation_Destroy(&nn_compilation);
222     return kLiteError;
223   }
224   OH_NNModel_Destroy(&nn_model);
225 
226   OH_NNExecutor *nn_executor = nullptr;
227   nn_executor = OH_NNExecutor_Construct(nn_compilation);
228   if (nn_executor == nullptr) {
229     MS_LOG(ERROR) << "Construct NNExecutor failed, ret: " << ret_code;
230     OH_NNCompilation_Destroy(&nn_compilation);
231     return kLiteError;
232   }
233   OH_NNCompilation_Destroy(&nn_compilation);
234 
235   auto nnrt_model_kernel = new (std::nothrow) NNRTModelKernel(nn_executor, nnrt_device_info_, model->inputs(), model->outputs());
236   if (nnrt_model_kernel == nullptr) {
237     OH_NNExecutor_Destroy(&nn_executor);
238     MS_LOG(ERROR) << "new NNRTModelKernel failed";
239     return kLiteError;
240   }
241   nn_executor_list_.push_back(nn_executor);
242 
243   model->Replace(model->BeginKernelIterator(), model->EndKernelIterator(), nnrt_model_kernel);
244   return kSuccess;
245 }
246 #endif
247 
BuildNormalModel(DelegateModel<schema::Primitive> * model)248 Status NNRTDelegate::BuildNormalModel(DelegateModel<schema::Primitive> *model) {
249   MS_LOG(DEBUG) << "Start to build NNRT model.";
250   if ((lite_graph_ == nullptr) || (lite_graph_->sub_graphs_.size() > 1)) {
251     MS_LOG(WARNING) << "LiteGraph contains more than one subgraph. NNRT does not support control-flow model yet, fallback to CPU";
252     return kSuccess;
253   }
254 
255   OH_NNModel *full_model = CreateFullNNModel();
256   if (full_model == nullptr) {
257     MS_LOG(WARNING) << "Build full NNModel failed, fallback to CPU";
258     return kSuccess;
259   }
260   std::vector<bool> op_supports = QueryOpSupports(full_model);
261   if (op_supports.empty()) {
262     MS_LOG(WARNING) << "Query no op supports for full model, fallback to CPU";
263     OH_NNModel_Destroy(&full_model);
264     return kSuccess;
265   }
266   auto nnrt_subgraph_ranges = GetNNRTSubgraphRanges(model, op_supports);
267   MS_LOG(INFO) << "Found NNRT subgraph count: " << nnrt_subgraph_ranges.size();
268 
269   std::vector<LiteGraph *> sub_lite_graphs;
270   auto ret = CreateLiteGraphForNNRTSubgraph(nnrt_subgraph_ranges, &sub_lite_graphs);
271   if (ret != kSuccess) {
272     OH_NNModel_Destroy(&full_model);
273     MS_LOG(WARNING) << "Create NNRT sub LiteGraph failed, fallback to CPU";
274     return kSuccess;
275   }
276 
277   std::vector<NNRTModelKernel *> nnrt_subgraph_kernels;
278   ret = CreateNNRTSubgraphKernels(model, sub_lite_graphs, nnrt_subgraph_ranges, &nnrt_subgraph_kernels);
279   if (ret != kSuccess) {
280     OH_NNModel_Destroy(&full_model);
281     MS_LOG(WARNING) << "Create NNRT subgraph kernel failed, fallback to CPU";
282     return kSuccess;
283   }
284 
285   ReplaceNNRTKernelsInDelegateModel(model, nnrt_subgraph_ranges, nnrt_subgraph_kernels);
286   OH_NNModel_Destroy(&full_model);
287   MS_LOG(INFO) << "NNRTDelegate build success.";
288   return kSuccess;
289 }
290 
CreateFullNNModel()291 OH_NNModel *NNRTDelegate::CreateFullNNModel() {
292   if (lite_graph_ == nullptr) {
293     MS_LOG(ERROR) << "Lite graph is null";
294     return nullptr;
295   }
296 
297   if (lite_graph_->sub_graphs_.empty()) {
298     MS_LOG(ERROR) << "Lite graph must have at lease one subgraph";
299     return nullptr;
300   }
301 
302   OH_NNModel *nn_model = OH_NNModel_Construct();
303   if (nn_model == nullptr) {
304     MS_LOG(ERROR) << "Create NNModel failed, result is nullptr";
305     return nullptr;
306   }
307 
308   auto ret = OH_NNModel_BuildFromLiteGraph(nn_model, lite_graph_, nullptr, 0);
309   if (ret != OH_NN_SUCCESS) {
310     MS_LOG(ERROR) << "Build NNModel failed, ret: " << ret;
311     OH_NNModel_Destroy(&nn_model);
312     return nullptr;
313   }
314   return nn_model;
315 }
316 
QueryOpSupports(OH_NNModel * nn_model)317 std::vector<bool> NNRTDelegate::QueryOpSupports(OH_NNModel *nn_model) {
318   const bool *is_supported = nullptr; // Note: this memory is owned by nn_model, don't free alone.
319   uint32_t op_count = 0;
320   auto ret = OH_NNModel_GetAvailableOperations(nn_model, nnrt_device_info_.device_id_, &is_supported, &op_count);
321   if (ret != OH_NN_SUCCESS) {
322     MS_LOG(WARNING) << "NNModel GetAvailableOperations failed, ret: " << ret
323                   << ", maybe caused by dataParcel data length limitation";
324     return {};
325   }
326   std::vector<bool> op_supports(is_supported, is_supported + op_count);
327   return op_supports;
328 }
329 
330 /* Find continuous sub-sequence in op_supports. */
GetNNRTSubgraphRanges(DelegateModel<schema::Primitive> * model,const std::vector<bool> & op_supports)331 std::vector<NNRTOpRange> NNRTDelegate::GetNNRTSubgraphRanges(DelegateModel<schema::Primitive> *model,
332                                                              const std::vector<bool> &op_supports) {
333   std::vector<NNRTOpRange> nnrt_subgraph_ranges;
334   NNRTOpRange op_range;
335   bool start_count = false;
336   for (size_t i = 0; i < op_supports.size(); i++) {
337     if (op_supports[i]) {
338       if (start_count == false) {
339         start_count = true;
340         op_range.begin_index_ = i;
341         op_range.begin_iter_ = model->BeginKernelIterator() + i;
342       }
343     } else {
344       if (start_count == true) {
345         start_count = false;
346         op_range.end_index_ = i;
347         op_range.end_iter_ = model->BeginKernelIterator() + i;
348         nnrt_subgraph_ranges.push_back(op_range);
349       }
350     }
351   }
352   // handle last true subsequence
353   if (start_count == true) {
354     op_range.end_index_ = op_supports.size();
355     op_range.end_iter_ = model->EndKernelIterator();
356     nnrt_subgraph_ranges.push_back(op_range);
357     MS_LOG(INFO) << "Schedule NNRT subgraph range: [" << op_range.begin_index_ << ", " << op_range.end_index_ << ")";
358   }
359   return nnrt_subgraph_ranges;
360 }
361 
362 /**
363  * This method ONLY works when the follow pre-conditions are satisfied:
364  * 1. The node order of lite_graph_->all_nodes should be consistent with DelegateModel sequence.
365  *  This ensures the kernel replacement in DelegateModel based on the re-organizing info from lite_graph_ is correct.
366  * 2. The node indices of lite_graph_->sub_graphs[0].node_indices should be monotonically increasing from 0 to size - 1.
367  */
CreateLiteGraphForNNRTSubgraph(const std::vector<NNRTOpRange> & nnrt_op_ranges,std::vector<LiteGraph * > * sub_lite_graphs)368 Status NNRTDelegate::CreateLiteGraphForNNRTSubgraph(
369     const std::vector<NNRTOpRange> &nnrt_op_ranges,
370     std::vector<LiteGraph *> *sub_lite_graphs) {
371   MS_LOG(INFO) << "Start creating LiteGraph for NNRT subgraph";
372   for (const auto &op_range: nnrt_op_ranges) {
373     MS_LOG(INFO) << "Process op range: [" << op_range.begin_index_ << ", " << op_range.end_index_ << ")";
374     LiteGraph *sub_lite_graph = new (std::nothrow)LiteGraph;
375     if (sub_lite_graph == nullptr) {
376       MS_LOG(ERROR) << "Allocate LiteGraph failed";
377       return kLiteError;
378     }
379     sub_lite_graph->name_ = lite_graph_->name_;
380     sub_lite_graph->version_ = lite_graph_->version_;
381 
382     auto sub_graph = new (std::nothrow)LiteGraph::SubGraph;
383     if (sub_graph == nullptr) {
384       MS_LOG(ERROR) << "Allocate SubGraph failed";
385       return kLiteError;
386     }
387     sub_graph->name_ = lite_graph_->name_;
388     sub_lite_graph->sub_graphs_.push_back(sub_graph);
389 
390     // deal with all_nodes
391     MS_LOG(INFO) << "Assemble all_nodes...";
392     int new_node_index = 0;
393     std::map<uint32_t, schema::Tensor *> in_tensor_index_map;
394     std::map<uint32_t, schema::Tensor *> out_tensor_index_map;
395     for (size_t index = op_range.begin_index_; index < op_range.end_index_; index++) {
396       LiteGraph::Node *node = new (std::nothrow)LiteGraph::Node;
397       if (node == nullptr) {
398         MS_LOG(ERROR) << "Allocate Node failed";
399         return kLiteError;
400       }
401       *node = *lite_graph_->all_nodes_[index];
402       sub_lite_graph->all_nodes_.push_back(node);
403       sub_graph->node_indices_.push_back(new_node_index++);
404 
405       for (auto i: node->input_indices_) {
406         in_tensor_index_map.emplace(i, lite_graph_->all_tensors_[i]);
407       }
408       for (auto i: node->output_indices_) {
409         out_tensor_index_map.emplace(i, lite_graph_->all_tensors_[i]);
410       }
411     }
412 
413     // deal with all_tensors
414     MS_LOG(INFO) << "Assemble all_tensors...";
415     std::set<schema::Tensor *> tensors;
416     for (auto iter: in_tensor_index_map) {
417       tensors.emplace(iter.second);
418     }
419     for (auto iter: out_tensor_index_map) {
420       tensors.emplace(iter.second);
421     }
422 
423     uint32_t new_index = 0;
424     std::map<schema::Tensor *, uint32_t> new_tensor_maps;
425     for (auto tensor: tensors) {
426       new_tensor_maps.emplace(tensor, new_index++);
427     }
428 
429     sub_lite_graph->all_tensors_ = std::vector<schema::Tensor *>(tensors.begin(), tensors.end());
430 
431     // deal with every node's input/output indices
432     MS_LOG(INFO) << "Set input/output indices of each node...";
433     for (auto node: sub_lite_graph->all_nodes_) {
434       for (auto &index : node->input_indices_) {
435         index = new_tensor_maps.at(in_tensor_index_map.at(index));
436       }
437       for (auto &index : node->output_indices_) {
438         index = new_tensor_maps.at(out_tensor_index_map.at(index));
439       }
440     }
441 
442     // deal with subgraph's input/output indices
443     MS_LOG(INFO) << "Set input/output indices of each subgraph...";
444     sub_graph->tensor_indices_ = std::vector<uint32_t>(tensors.size());
445     std::iota(sub_graph->tensor_indices_.begin(), sub_graph->tensor_indices_.end(), 0U);
446 
447     for (auto iter: in_tensor_index_map) {
448       auto new_tensor_index = new_tensor_maps[iter.second];
449       MS_LOG(DEBUG) << "handle input: old: " << iter.first << ", new: " << new_tensor_index << std::endl;
450       if (IsConstTensor(*iter.second)) {
451         MS_LOG(DEBUG) << "- tensor: " << new_tensor_index << " is const." << std::endl;
452         continue;
453       }
454 
455       bool is_subgraph_input = true;
456       for (auto node: sub_lite_graph->all_nodes_) {
457         if (std::find(node->output_indices_.begin(), node->output_indices_.end(), new_tensor_index) !=
458             node->output_indices_.end()) {
459           is_subgraph_input = false;
460           MS_LOG(DEBUG) << "- tensor: " << new_tensor_index << " is not subgraph input." << std::endl;
461           break;
462         }
463       }
464       if (is_subgraph_input) {
465         sub_graph->input_indices_.push_back(new_tensor_index);
466         MS_LOG(DEBUG) << "- select tensor: " << new_tensor_index << " as subgraph input." << std::endl;
467       }
468     }
469 
470     for (auto iter: out_tensor_index_map) {
471       int new_tensor_index = new_tensor_maps.at(iter.second);
472       MS_LOG(DEBUG) << "handle output: old: " << iter.first << ", new: " << new_tensor_index << std::endl;
473       if (IsConstTensor(*iter.second)) {
474         MS_LOG(DEBUG) << "- tensor: " << new_tensor_index << " is const." << std::endl;
475         continue;
476       }
477 
478       bool is_subgraph_output = false;
479       for (size_t i = 0; i < lite_graph_->all_nodes_.size(); i++) {
480         if ((i >= op_range.begin_index_) && (i < op_range.end_index_)) {
481           continue;
482         }
483         auto node = lite_graph_->all_nodes_[i];
484         if (std::find(node->input_indices_.begin(), node->input_indices_.end(), iter.first) !=
485             node->input_indices_.end()) { // As the input of node which does not belong to the subgraph.
486           is_subgraph_output = true;
487           MS_LOG(DEBUG) << "- tensor: " << new_tensor_index << " is original subgraph output. node: " << node->primitive_ << std::endl;
488           break;
489         }
490       }
491       bool is_graph_output = (std::find(lite_graph_->output_indices_.begin(),lite_graph_->output_indices_.end(),
492                                         iter.first) != lite_graph_->output_indices_.end());
493       if (is_graph_output) {
494         MS_LOG(DEBUG) << "- tensor: " << new_tensor_index << " is graph output." << std::endl;
495       }
496       if (is_subgraph_output || is_graph_output) {
497         sub_graph->output_indices_.push_back(new_tensor_index);
498         MS_LOG(DEBUG) << "- select tensor: " << new_tensor_index << " as subgraph output." << std::endl;
499       }
500     }
501 
502     // deal with full-graph's input/output indices
503     sub_lite_graph->input_indices_ = sub_graph->input_indices_;
504     sub_lite_graph->output_indices_ = sub_graph->output_indices_;
505     sub_lite_graphs->push_back(sub_lite_graph);
506   }
507   MS_LOG(INFO) << "Finished creating LiteGraph for NNRT subgraph";
508   return kSuccess;
509 }
510 
511 struct TensorLocation {
512   uint32_t node_index; // the index of node which the tensor belongs to.
513   uint32_t tensor_index; // the index of node in/out tensors which the tensor is located at.
514 };
515 
InitNNCompilation(OH_NNCompilation * nn_compilation) const516 Status NNRTDelegate::InitNNCompilation(OH_NNCompilation *nn_compilation) const {
517   auto ret_code = OH_NNCompilation_SetDevice(nn_compilation, nnrt_device_info_.device_id_);
518   if (ret_code != OH_NN_SUCCESS) {
519     MS_LOG(ERROR) << "NNCompilation set device id failed, ret: " << ret_code;
520     return kLiteError;
521   }
522   ret_code = OH_NNCompilation_SetPerformanceMode(nn_compilation,
523                                                  (OH_NN_PerformanceMode)(nnrt_device_info_.performance_mode_));
524   if ((ret_code != OH_NN_SUCCESS) && (ret_code != OH_NN_OPERATION_FORBIDDEN)) {
525     MS_LOG(ERROR) << "NNCompilation set performance mode failed, ret: " << ret_code;
526     return kLiteError;
527   }
528   ret_code = OH_NNCompilation_SetPriority(nn_compilation, (OH_NN_Priority)(nnrt_device_info_.priority_));
529   if ((ret_code != OH_NN_SUCCESS) && (ret_code != OH_NN_OPERATION_FORBIDDEN)) {
530     MS_LOG(ERROR) << "NNCompilation set priority failed, ret: " << ret_code;
531     return kLiteError;
532   }
533   ret_code = OH_NNCompilation_EnableFloat16(nn_compilation, nnrt_device_info_.enable_fp16_);
534   if ((ret_code != OH_NN_SUCCESS) && (ret_code != OH_NN_OPERATION_FORBIDDEN)) {
535     MS_LOG(ERROR) << "NNCompilation enable fp16 failed, ret: " << ret_code;
536     return kLiteError;
537   }
538 
539   if (!extension_options_.cache_path_.empty()) {  // Set cache path if user indeed set it.
540     ret_code = OH_NNCompilation_SetCache(nn_compilation, extension_options_.cache_path_.c_str(),
541                                          extension_options_.cache_version_);
542     if ((ret_code != OH_NN_SUCCESS) && (ret_code != OH_NN_OPERATION_FORBIDDEN)) {
543       MS_LOG(ERROR) << "NNCompilation set cache failed, ret: " << ret_code;
544       return kLiteError;
545     }
546   }
547 
548 #ifdef SUPPORT_NNRT_METAGRAPH
549   if (hiai_handle_ != nullptr && IsKirinNPUWithOfflineInference()) {
550     if (extension_options_.band_mode != mindspore::lite::HIAI_BANDMODE_UNSET) {
551       ret_code = mindspore::lite::HMS_HiAIOptions_SetBandMode(nn_compilation, extension_options_.band_mode);
552       if ((ret_code != OH_NN_SUCCESS) && (ret_code != OH_NN_OPERATION_FORBIDDEN)) {
553         MS_LOG(ERROR) << "NNCompilation set BandMode failed, ret: " << ret_code;
554         return kLiteError;
555       }
556     }
557 
558     if (extension_options_.is_optional_quant_setted) {
559       if (extension_options_.quant_config == nullptr || extension_options_.quant_config_size <= 0) {
560         MS_LOG(ERROR) << "NNCompilation set QuantConfig faild, input quant config is invalid, please make sure buffer "
561                       << "is not null and size > 0.";
562         return kLiteError;
563       }
564       ret_code = mindspore::lite::HMS_HiAIOptions_SetQuantConfig(nn_compilation, extension_options_.quant_config,
565                                                                  extension_options_.quant_config_size);
566       if ((ret_code != OH_NN_SUCCESS) && (ret_code != OH_NN_OPERATION_FORBIDDEN)) {
567         MS_LOG(ERROR) << "NNCompilation set QuantConfig failed, ret: " << ret_code;
568         return kLiteError;
569       }
570     }
571   } else {
572     MS_LOG(WARNING) << "hiai_foundation is nullptr.";
573   }
574 #endif
575 
576   ret_code = OH_NNCompilation_Build(nn_compilation);
577   if (ret_code != OH_NN_SUCCESS) {
578     MS_LOG(ERROR) << "Build NNCompilation failed, ret: " << ret_code;
579     return kLiteError;
580   }
581   return kSuccess;
582 }
583 
CreateNNRTSubgraphKernels(DelegateModel<schema::Primitive> * model,const std::vector<LiteGraph * > & sub_lite_graphs,const std::vector<NNRTOpRange> & nnrt_subgraph_ranges,std::vector<NNRTModelKernel * > * nnrt_subgraph_kernels)584 Status NNRTDelegate::CreateNNRTSubgraphKernels(DelegateModel<schema::Primitive> *model,
585                                                const std::vector<LiteGraph *> &sub_lite_graphs, const std::vector<NNRTOpRange> &nnrt_subgraph_ranges,
586                                                std::vector<NNRTModelKernel *> *nnrt_subgraph_kernels) {
587   for (size_t i = 0; i < sub_lite_graphs.size(); i++) {
588     auto sub_lite_graph = sub_lite_graphs[i];
589 
590     OH_NNModel *nn_model = OH_NNModel_Construct();
591     auto ret = OH_NNModel_BuildFromLiteGraph(nn_model, sub_lite_graph, nullptr, 0);
592     if (ret != OH_NN_SUCCESS) {
593       MS_LOG(ERROR) << "Build NNModel failed, ret: " << ret;
594       OH_NNModel_Destroy(&nn_model);
595       return kLiteError;
596     }
597 
598     OH_NNCompilation *nn_compilation = OH_NNCompilation_Construct(nn_model);
599     if (nn_compilation == nullptr) {
600       MS_LOG(ERROR) << "Construct NNCompilation failed";
601       OH_NNModel_Destroy(&nn_model);
602       return kLiteError;
603     }
604     MS_LOG(DEBUG) << "NNRTDelegate creates NNCompilation success.";
605 
606     auto ret_code = InitNNCompilation(nn_compilation);
607     if (ret_code != kSuccess) {
608       MS_LOG(ERROR) << "Init NNCompilation failed";
609       OH_NNCompilation_Destroy(&nn_compilation);
610       OH_NNModel_Destroy(&nn_model);
611       return kLiteError;
612     }
613 
614     OH_NNExecutor *nn_executor = nullptr;
615     nn_executor = OH_NNExecutor_Construct(nn_compilation);
616     if (nn_executor == nullptr) {
617       MS_LOG(ERROR) << "Construct NNExecutor failed, ret: " << ret_code;
618       OH_NNCompilation_Destroy(&nn_compilation);
619       OH_NNModel_Destroy(&nn_model);
620       return kLiteError;
621     }
622     MS_LOG(DEBUG) << "NNRTDelegate creates NNExecutor success.";
623 
624     bool format_not_support = false;
625     std::vector<MSTensor> in_tensors;
626     for (auto index: sub_lite_graph->sub_graphs_[0]->input_indices_) {
627       TensorLocation location;
628       for (auto node_index: sub_lite_graph->sub_graphs_[0]->node_indices_) {
629         auto node = sub_lite_graph->all_nodes_[node_index];
630         auto iter = std::find(node->input_indices_.begin(), node->input_indices_.end(), index);
631         if (iter != node->input_indices_.end()) {
632           uint32_t tensor_index = iter - node->input_indices_.begin();
633           location.node_index = node_index;
634           location.tensor_index = tensor_index;
635           MS_LOG(INFO) << "Found graph input index: " << index << " is the " << tensor_index << "th input of the node " << node->primitive_;
636           break;
637         }
638       }
639       KernelIter kernel_iter = nnrt_subgraph_ranges[i].begin_iter_ + location.node_index;
640       in_tensors.push_back((*kernel_iter)->inputs()[location.tensor_index]);
641       if (in_tensors.back().format() != Format::NHWC) {
642         format_not_support = true;
643         break ;
644       }
645     }
646 
647     std::vector<MSTensor> out_tensors;
648     for (auto index: sub_lite_graph->sub_graphs_[0]->output_indices_) {
649       TensorLocation location;
650       for (auto node_index: sub_lite_graph->sub_graphs_[0]->node_indices_) {
651         auto node = sub_lite_graph->all_nodes_[node_index];
652         auto iter = std::find(node->output_indices_.begin(), node->output_indices_.end(), index);
653         if (iter != node->output_indices_.end()) {
654           uint32_t tensor_index = iter - node->output_indices_.begin();
655           location.node_index = node_index;
656           location.tensor_index = tensor_index;
657           MS_LOG(INFO) << "Found graph output index: " << index << " is the " << tensor_index << "th output of the node " << node->primitive_;
658           break;
659         }
660       }
661       KernelIter kernel_iter = nnrt_subgraph_ranges[i].begin_iter_ + location.node_index;
662       out_tensors.push_back((*kernel_iter)->outputs()[location.tensor_index]);
663       if (out_tensors.back().format() != Format::NHWC) {
664         format_not_support = true;
665         break ;
666       }
667     }
668     if (format_not_support) {
669       MS_LOG(WARNING) << "Not support in/out tensor format, skip this subgraph";
670       OH_NNCompilation_Destroy(&nn_compilation);
671       OH_NNModel_Destroy(&nn_model);
672       nnrt_subgraph_kernels->push_back(nullptr);
673       continue ;
674     }
675 
676     auto nnrt_model_kernel = new (std::nothrow) NNRTModelKernel(nn_executor, nnrt_device_info_, in_tensors, out_tensors);
677     if (nnrt_model_kernel == nullptr) {
678       MS_LOG(ERROR) << "new NNRTModelKernel failed";
679       return kLiteError;
680     }
681     nn_executor_list_.push_back(nn_executor);
682     OH_NNCompilation_Destroy(&nn_compilation);
683     OH_NNModel_Destroy(&nn_model);
684     nnrt_subgraph_kernels->push_back(nnrt_model_kernel);
685   }
686   return kSuccess;
687 }
688 
ReplaceNNRTKernelsInDelegateModel(DelegateModel<schema::Primitive> * model,const std::vector<NNRTOpRange> & nnrt_subgraph_ranges,const std::vector<NNRTModelKernel * > & nnrt_subgraph_kernels)689 void NNRTDelegate::ReplaceNNRTKernelsInDelegateModel(DelegateModel<schema::Primitive> *model,
690                                        const std::vector<NNRTOpRange> &nnrt_subgraph_ranges,
691                                        const std::vector<NNRTModelKernel *> &nnrt_subgraph_kernels) {
692   // Here we perform the replacement from back to front intentionally! If replace from front to end, the kernel
693   // sequence would shrink and the later begin_iter_/end_iter_ may be erased already.
694   for (int i = nnrt_subgraph_ranges.size() - 1; i >= 0; i--) {
695     if (nnrt_subgraph_kernels[i] == nullptr) {
696       continue;
697     }
698     auto from = nnrt_subgraph_ranges[i].begin_iter_;
699     auto end = nnrt_subgraph_ranges[i].end_iter_;
700     (void)model->Replace(from, end, nnrt_subgraph_kernels[i]);
701     MS_LOG(INFO) << "Replace nnrt subgraph kernel in range: [" << (from - model->BeginKernelIterator())
702       << ", " << (end - model->BeginKernelIterator()) << ")";
703   }
704 }
705 
PrepareInputs(DelegateModel<schema::Primitive> * model,OH_NNExecutor * oh_nn_executor)706 Status NNRTDelegate::PrepareInputs(DelegateModel<schema::Primitive> *model,
707                                    OH_NNExecutor *oh_nn_executor) {
708   auto input_tensors = model->inputs();
709   for (size_t i = 0; i < input_tensors.size(); i++) {
710     auto tensor = input_tensors[i];
711     auto tensor_shape = tensor.Shape();
712     auto tmp_quant_param = tensor.QuantParams();
713     OH_NN_QuantParam *quant_param = nullptr;
714     std::vector<uint32_t> bit_num;
715     std::vector<double> scale;
716     std::vector<int32_t> zero_point;
717     if (!tmp_quant_param.empty()) {
718       quant_param = new(std::nothrow) OH_NN_QuantParam;
719       if (quant_param == nullptr) {
720         MS_LOG(ERROR) << "new OH_NN_QuantParam failed.";
721         return kLiteError;
722       }
723       for (auto qparam : tmp_quant_param) {
724         bit_num.emplace_back(qparam.bit_num);
725         scale.emplace_back(qparam.scale);
726         zero_point.emplace_back(qparam.zero_point);
727       }
728       quant_param->quantCount = tmp_quant_param.size();
729       quant_param->numBits = bit_num.data();
730       quant_param->scale = scale.data();
731       quant_param->zeroPoint = zero_point.data();
732     }
733     auto oprend = new(std::nothrow) OH_NN_Tensor;
734     if (oprend == nullptr) {
735       MS_LOG(ERROR) << "new OH_NN_Tensor Failed";
736       return kLiteError;
737     }
738     oprend->dataType = CastToNNRTDataType(tensor.DataType());
739     oprend->dimensionCount = tensor_shape.size();
740 
741     std::vector<int32_t> dimensions_list;
742     for (auto shape : tensor_shape) {
743       if (shape < INT32_MAX) {
744         dimensions_list.emplace_back(static_cast<int32_t>(shape));
745       } else {
746         MS_LOG(ERROR) << "NNExecutor SetInput failed,tensor dimension is is too large, max dim = " << INT32_MAX
747                       << ", but get dimension = " << shape;
748         return kLiteError;
749       }
750     }
751     oprend->dimensions = dimensions_list.data();
752     oprend->quantParam = quant_param;
753     oprend->type = OH_NN_TENSOR;
754     OH_NN_ReturnCode ret_code =
755         OH_NNExecutor_SetInput(oh_nn_executor, i, oprend, tensor.MutableData(), tensor.DataSize());
756     delete (oprend);
757 
758     if (!tmp_quant_param.empty()) {
759       delete (quant_param);
760       quant_param = nullptr;
761     }
762 
763     if (ret_code != OH_NN_SUCCESS) {
764       MS_LOG(ERROR) << "NNExecutor SetInput failed, current input tensor is" << tensor.Name()
765                     << "OH_NN_ReturnCode = " << ret_code;
766       return kLiteError;
767     }
768   }
769   return kSuccess;
770 }
771 
CastToNNRTDataType(DataType data_type)772 OH_NN_DataType NNRTDelegate::CastToNNRTDataType(DataType data_type) {
773   const std::unordered_map<DataType, OH_NN_DataType> kDataTypeMap = {
774       {DataType::kNumberTypeBool, OH_NN_BOOL},
775       {DataType::kNumberTypeInt8, OH_NN_INT8},
776       {DataType::kNumberTypeInt16, OH_NN_INT16},
777       {DataType::kNumberTypeInt32, OH_NN_INT32},
778       {DataType::kNumberTypeInt64, OH_NN_INT64},
779       {DataType::kNumberTypeUInt8, OH_NN_UINT8},
780       {DataType::kNumberTypeUInt16, OH_NN_UINT16},
781       {DataType::kNumberTypeUInt32, OH_NN_UINT32},
782       {DataType::kNumberTypeUInt64, OH_NN_UINT64},
783       {DataType::kNumberTypeFloat16, OH_NN_FLOAT16},
784       {DataType::kNumberTypeFloat32, OH_NN_FLOAT32},
785       {DataType::kNumberTypeFloat64, OH_NN_FLOAT64},
786   };
787 
788   auto iter = kDataTypeMap.find(data_type);
789   if (iter == kDataTypeMap.end()) {
790     return OH_NN_UNKNOWN;
791   }
792   return iter->second;
793 }
794 
PrepareOutputs(DelegateModel<schema::Primitive> * model,OH_NNExecutor * oh_nn_executor)795 Status NNRTDelegate::PrepareOutputs(DelegateModel<schema::Primitive> *model,
796                                     OH_NNExecutor *oh_nn_executor) {
797   auto output_tensors = model->outputs();
798   for (size_t i = 0; i < output_tensors.size(); i++) {
799     auto tensor = output_tensors[i];
800     OH_NN_ReturnCode ret_code = OH_NNExecutor_SetOutput(oh_nn_executor, i, tensor.MutableData(), tensor.DataSize());
801     if (ret_code != OH_NN_SUCCESS) {
802       MS_LOG(ERROR) << "NNExecutor SetOutput failed, current out tensor is" << tensor.Name()
803                     << ", OH_NN_ReturnCode = " << ret_code;
804       return kLiteError;
805     }
806   }
807   return kSuccess;
808 }
809 
TensorToSchemaTensor(Tensor * lite_tensor,schema::Tensor * schema_tensor)810 schema::Tensor *NNRTDelegate::TensorToSchemaTensor(Tensor *lite_tensor, schema::Tensor *schema_tensor) {
811   flatbuffers::FlatBufferBuilder fbb(1024);
812   auto shape = lite_tensor->shape();
813   std::vector<int32_t> dim_vec(shape.begin(), shape.end());
814 
815   auto quant_params = lite_tensor->quant_params();
816   std::vector<flatbuffers::Offset<mindspore::schema::QuantParam>> quant_vec;
817   quant_vec.reserve(quant_params.size());
818   for (auto q_param : quant_params) {
819     quant_vec.emplace_back(schema::CreateQuantParam(fbb, q_param.scale, q_param.zeroPoint, 0, 0, true, q_param.bitNum));
820   }
821   auto quant_clusters = lite_tensor->quant_clusters();
822 
823   auto external_data = schema_tensor->externalData();
824   std::vector<flatbuffers::Offset<mindspore::schema::ExternalData>> external_data_vec;
825   if (external_data != nullptr) {
826     for (auto ed : *external_data) {
827       external_data_vec.emplace_back(schema::CreateExternalDataDirect(fbb, ed->checkSum()->c_str(), ed->location()->c_str(), 0, ed->length()));
828     }
829   }
830   uint8_t *data_src = reinterpret_cast<uint8_t *>(lite_tensor->data());
831   std::vector<uint8_t> data_vec(data_src, data_src + lite_tensor->Size());
832   auto tensor_offset = schema::CreateTensorDirect(fbb, schema_tensor->nodeType(), lite_tensor->data_type(), &dim_vec,
833                                                   schema_tensor->format(), 0, 0, &data_vec, &quant_vec,
834                                                   &quant_clusters, schema_tensor->name()->c_str(),
835                                                   schema_tensor->enableHuffmanCode(),
836                                                   mindspore::schema::WeightQuantCompressType_NONE, &external_data_vec);
837   fbb.Finish(tensor_offset);
838 
839   auto buf = fbb.GetBufferPointer();
840   if (buf == nullptr) {
841     MS_LOG(ERROR) << "GetBufferPointer return nullptr";
842     fbb.Clear();
843     return nullptr;
844   }
845   size_t byte_num = fbb.GetSize();
846   auto tensor_buf = reinterpret_cast<char *>(malloc(byte_num));
847   if (tensor_buf == nullptr) {
848     MS_LOG(ERROR) << "malloc primitive_buf_ failed";
849     fbb.Clear();
850     return nullptr;
851   }
852   memcpy(tensor_buf, buf, fbb.GetSize());
853   auto tensor = flatbuffers::GetRoot<schema::Tensor>(tensor_buf);
854   fbb.Clear();
855   if (tensor != nullptr) {
856     // use to free tensor_buf
857     auto iter = dequant_schema_tensors_buffer_map_.find(const_cast<schema::Tensor *>(tensor));
858     if (iter != dequant_schema_tensors_buffer_map_.end()) {
859       MS_LOG(ERROR) << "schema tensor is duplicated.";
860       return nullptr;
861     }
862     dequant_schema_tensors_buffer_map_[const_cast<schema::Tensor *>(tensor)] = tensor_buf;
863   }
864   return const_cast<schema::Tensor *>(tensor);
865 }
866 
DequantNodeInputs(LiteGraph::Node * node)867 int NNRTDelegate::DequantNodeInputs(LiteGraph::Node *node) {
868   auto in_size = node->input_indices_.size();
869   int ret = RET_OK;
870   for (size_t i = 0; i < in_size; i++) {
871     auto tensor_index = node->input_indices_[i];
872     auto *src_tensor = lite_graph_->all_tensors_[tensor_index];
873     auto input = dequant_src_tensors_->at(tensor_index);
874     if (!input->IsConst() || !(src_tensor->dataType() == kNumberTypeInt8 ||
875         src_tensor->dataType() == kNumberTypeInt16 || src_tensor->dataType() == kNumberTypeInt32)) {
876       continue;
877     }
878     auto dst_tensor = TensorToSchemaTensor(input, src_tensor);
879     if (dst_tensor != nullptr) {
880       dequant_schema_tensors_.emplace(tensor_index, dst_tensor);
881       replaced_schema_tensors_.emplace_back(src_tensor);
882     } else {
883       MS_LOG(ERROR) << "create dequant schema tensor failed, node: " << node->name_ << ", tensor_index: "
884                     << tensor_index;
885       ret = RET_ERROR;
886       break;
887     }
888   }
889   return ret;
890 }
891 
DequantLiteGraph(LiteGraph * lite_graph)892 Status NNRTDelegate::DequantLiteGraph(LiteGraph *lite_graph) {
893   for (auto node_index : lite_graph->sub_graphs_[0]->node_indices_) {
894     auto node = lite_graph->all_nodes_[node_index];
895 
896     if (node->quant_type_ != static_cast<int>(schema::QuantType_QUANT_WEIGHT)) {
897       continue;
898     }
899     auto ret = DequantNodeInputs(node);
900     if (ret != RET_OK) {
901       MS_LOG(ERROR) << "Dequant node failed: " << ret << ", node_name: " << node->name_;
902       for (auto iter : dequant_schema_tensors_) {
903         delete iter.second;
904         iter.second = nullptr;
905       }
906       return kLiteNotSupport;
907     }
908     node->quant_type_ = schema::QuantType_QUANT_NONE;
909   }
910   for (auto iter : dequant_schema_tensors_) {
911     lite_graph_->all_tensors_[iter.first] = iter.second;
912   }
913   return kSuccess;
914 }
915 
ShallowCopyLiteGraph(const lite::LiteGraph & lite_graph)916 void NNRTDelegate::ShallowCopyLiteGraph(const lite::LiteGraph &lite_graph) {
917   std::vector<LiteGraph::Node *> node_list;
918   node_list.reserve(lite_graph.all_nodes_.size());
919   // copy node
920   for (auto node : lite_graph.all_nodes_) {
921     auto new_node = new(std::nothrow) LiteGraph::Node;
922     if (new_node == nullptr) {
923       MS_LOG(ERROR) << " new LiteGraph::Node failed.";
924       return;
925     }
926     new_node->name_ = node->name_;
927     new_node->op_type_ = node->op_type_;
928     new_node->node_type_ = node->node_type_;
929     new_node->primitive_ = node->primitive_;
930     new_node->base_operator_ = node->base_operator_;
931     new_node->input_indices_ = node->input_indices_;
932     new_node->output_indices_ = node->output_indices_;
933     new_node->quant_type_ = node->quant_type_;
934     new_node->device_type_ = node->device_type_;
935     node_list.emplace_back(new_node);
936   }
937   // copy subgraph
938   std::vector<LiteGraph::SubGraph *> subgraph_list;
939   for (auto subgraph : lite_graph.sub_graphs_) {
940     auto new_subgraph = new(std::nothrow) LiteGraph::SubGraph;
941     if (new_subgraph == nullptr) {
942       MS_LOG(ERROR) << "new LiteGraph::Subgraph failed.";
943       return;
944     }
945     new_subgraph->name_ = subgraph->name_;
946     new_subgraph->input_indices_ = subgraph->input_indices_;
947     new_subgraph->output_indices_ = subgraph->output_indices_;
948     new_subgraph->node_indices_ = subgraph->node_indices_;
949     subgraph_list.emplace_back(new_subgraph);
950   }
951   for (auto tensor : lite_graph.all_tensors_) {
952     Status ret = lite::CheckTensorSupported(static_cast<const schema::Tensor *>(tensor));
953     if (ret == kLiteError) {
954       MS_LOG(ERROR) << "tensor supported check failed.";
955       return;
956     }
957   }
958 
959   lite_graph_ = new(std::nothrow) lite::LiteGraph();
960   if (lite_graph_ == nullptr) {
961     MS_LOG(ERROR) << "new LiteGraph failed.";
962     return;
963   }
964 
965   lite_graph_->name_ = lite_graph.name_;
966   lite_graph_->version_ = lite_graph.version_;
967   lite_graph_->input_indices_ = lite_graph.input_indices_;
968   lite_graph_->output_indices_ = lite_graph.output_indices_;
969   lite_graph_->all_tensors_ = lite_graph.all_tensors_;
970   lite_graph_->all_nodes_ = node_list;
971   lite_graph_->sub_graphs_ = subgraph_list;
972   MS_LOG(INFO) << "ShallowCopyLiteGraph success.";
973 }
974 
FreeLiteGraph(lite::LiteGraph ** liteGraph)975 void NNRTDelegate::FreeLiteGraph(lite::LiteGraph **liteGraph) {
976   if (liteGraph != nullptr && *liteGraph != nullptr) {
977     MS_LOG(INFO) << "start to free LiteGraph.";
978     auto graph = *liteGraph;
979     graph->name_.clear();
980     graph->input_indices_.clear();
981     graph->output_indices_.clear();
982     MS_LOG(INFO) << "Destroying  nodes.";
983     // node
984     for (size_t idx = 0; idx < graph->all_nodes_.size(); idx++) {
985       if (graph->all_nodes_[idx] != nullptr) {
986         delete graph->all_nodes_[idx];
987         graph->all_nodes_[idx] = nullptr;
988       }
989     }
990     MS_LOG(INFO) << "Destroying  subgraphs.";
991     // subgraph
992     for (size_t idx = 0; idx < graph->sub_graphs_.size(); idx++) {
993       if (graph->sub_graphs_[idx] != nullptr) {
994         delete graph->sub_graphs_[idx];
995         graph->sub_graphs_[idx] = nullptr;
996       }
997     }
998     // graph
999     delete graph;
1000     *liteGraph = nullptr;
1001   } else {
1002     MS_LOG(WARNING) << "nnrt_lite_graph is nullptr, no need to free.";
1003   }
1004 }
1005 
~NNRTDelegate()1006 NNRTDelegate::~NNRTDelegate() {
1007   for (size_t i = 0; i < nn_executor_list_.size(); i++) {
1008     if (nn_executor_list_[i] != nullptr) {
1009       MS_LOG(INFO) << "start NNExecutor Destroy.";
1010       OH_NNExecutor_Destroy(&(nn_executor_list_[i]));
1011       MS_LOG(INFO) << "Destroy NNExecutor Finish.";
1012     }
1013   }
1014   if (lite_graph_ != nullptr) {
1015     MS_LOG(ERROR) << "Delete NNRTDelegate.";
1016   }
1017   for (auto iter : dequant_schema_tensors_buffer_map_) {
1018     if (iter.second != nullptr) {
1019       free(iter.second);
1020       iter.second = nullptr;
1021     }
1022   }
1023   dequant_schema_tensors_buffer_map_.clear();
1024 }
1025 }  // namespace lite
1026 }  // namespace mindspore
1027