• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1From 3793994296c2ede3f79544d613acd8f6600ec9fb Mon Sep 17 00:00:00 2001
2From: chengfeng27 <chengfeng27@huawei.com>
3Date: Fri, 7 Jun 2024 15:31:09 +0800
4Subject: fix lite_graph dequant crash
5
6---
7 .../delegate/nnrt/checker/primitive_check.cc  | 115 -----------
8 .../delegate/nnrt/checker/primitive_check.h   |   1 -
9 .../litert/delegate/nnrt/nnrt_allocator.cc    |  64 +++---
10 .../src/litert/delegate/nnrt/nnrt_allocator.h |  20 +-
11 .../src/litert/delegate/nnrt/nnrt_delegate.cc |  32 +--
12 .../src/litert/delegate/nnrt/nnrt_delegate.h  |   1 +
13 .../litert/delegate/nnrt/nnrt_model_kernel.cc | 190 ++++++++++--------
14 .../litert/delegate/nnrt/nnrt_model_kernel.h  |  17 +-
15 mindspore/lite/src/tensor.h                   |   2 +
16 9 files changed, 186 insertions(+), 256 deletions(-)
17
18diff --git a/mindspore/lite/src/litert/delegate/nnrt/checker/primitive_check.cc b/mindspore/lite/src/litert/delegate/nnrt/checker/primitive_check.cc
19index 6b191c8e..67d60f1b 100644
20--- a/mindspore/lite/src/litert/delegate/nnrt/checker/primitive_check.cc
21+++ b/mindspore/lite/src/litert/delegate/nnrt/checker/primitive_check.cc
22@@ -7,121 +7,6 @@
23 #include "src/common/utils.h"
24 namespace mindspore {
25 namespace lite {
26-
27-Status CheckPrimitiveSupported(const schema::Primitive *primitive) {
28-  if (primitive != nullptr) {
29-    auto prim = primitive;
30-    auto type = prim->value_type();
31-    switch (type) {
32-      case schema::PrimitiveType_Activation:
33-        return mindspore::kSuccess;
34-      case schema::PrimitiveType_AddFusion:
35-        return mindspore::kSuccess;
36-      case schema::PrimitiveType_ArgMaxFusion:
37-        return mindspore::kSuccess;
38-      case schema::PrimitiveType_AvgPoolFusion:
39-        return mindspore::kSuccess;
40-      case schema::PrimitiveType_BatchToSpaceND:
41-        return mindspore::kSuccess;
42-      case schema::PrimitiveType_BiasAdd:
43-        return mindspore::kSuccess;
44-      case schema::PrimitiveType_Cast:
45-        return mindspore::kSuccess;
46-      case schema::PrimitiveType_Concat:
47-        return mindspore::kSuccess;
48-      case schema::PrimitiveType_Conv2DFusion:
49-        return mindspore::kSuccess;
50-      case schema::PrimitiveType_Conv2dTransposeFusion:
51-        return mindspore::kSuccess;
52-      case schema::PrimitiveType_DivFusion:
53-        return mindspore::kSuccess;
54-      case schema::PrimitiveType_Eltwise:
55-        return mindspore::kSuccess;
56-      case schema::PrimitiveType_ExpandDims:
57-        return mindspore::kSuccess;
58-      case schema::PrimitiveType_Fill:
59-        return mindspore::kSuccess;
60-      case schema::PrimitiveType_FullConnection:
61-        return mindspore::kSuccess;
62-      case schema::PrimitiveType_FusedBatchNorm:
63-        return mindspore::kSuccess;
64-      case schema::PrimitiveType_Gather:
65-        return mindspore::kSuccess;
66-      case schema::PrimitiveType_LayerNormFusion:
67-        return mindspore::kSuccess;
68-      case schema::PrimitiveType_LessEqual:
69-        return mindspore::kSuccess;
70-      case schema::PrimitiveType_MatMulFusion:
71-        return mindspore::kSuccess;
72-      case schema::PrimitiveType_Maximum:
73-        return mindspore::kSuccess;
74-      case schema::PrimitiveType_MaxPoolFusion:
75-        return mindspore::kSuccess;
76-      case schema::PrimitiveType_MulFusion:
77-        return mindspore::kSuccess;
78-      case schema::PrimitiveType_OneHot:
79-        return mindspore::kSuccess;
80-      case schema::PrimitiveType_PadFusion:
81-        return mindspore::kSuccess;
82-      case schema::PrimitiveType_PowFusion:
83-        return mindspore::kSuccess;
84-      case schema::PrimitiveType_PReLUFusion:
85-        return mindspore::kSuccess;
86-      case schema::PrimitiveType_QuantDTypeCast:
87-        return mindspore::kSuccess;
88-      case schema::PrimitiveType_ReduceFusion:
89-        return mindspore::kSuccess;
90-      case schema::PrimitiveType_Reshape:
91-        return mindspore::kSuccess;
92-      case schema::PrimitiveType_Resize:
93-        return mindspore::kSuccess;
94-      case schema::PrimitiveType_Rsqrt:
95-        return mindspore::kSuccess;
96-      case schema::PrimitiveType_ScaleFusion:
97-        return mindspore::kSuccess;
98-      case schema::PrimitiveType_Shape:
99-        return mindspore::kSuccess;
100-      case schema::PrimitiveType_SliceFusion:
101-        return mindspore::kSuccess;
102-      case schema::PrimitiveType_Softmax:
103-        return mindspore::kSuccess;
104-      case schema::PrimitiveType_SpaceToBatchND:
105-        return mindspore::kSuccess;
106-      case schema::PrimitiveType_Split:
107-        return mindspore::kSuccess;
108-      case schema::PrimitiveType_Sqrt:
109-        return mindspore::kSuccess;
110-      case schema::PrimitiveType_SquaredDifference:
111-        return mindspore::kSuccess;
112-      case schema::PrimitiveType_Squeeze:
113-        return mindspore::kSuccess;
114-      case schema::PrimitiveType_Stack:
115-        return mindspore::kSuccess;
116-      case schema::PrimitiveType_StridedSlice:
117-        return mindspore::kSuccess;
118-      case schema::PrimitiveType_SubFusion:
119-        return mindspore::kSuccess;
120-      case schema::PrimitiveType_TileFusion:
121-        return mindspore::kSuccess;
122-      case schema::PrimitiveType_TopKFusion:
123-        return mindspore::kSuccess;
124-      case schema::PrimitiveType_Transpose:
125-        return mindspore::kSuccess;
126-      case schema::PrimitiveType_Unsqueeze:
127-        return mindspore::kSuccess;
128-      case schema::PrimitiveType_Custom:
129-        return mindspore::kSuccess;
130-      default: {
131-        MS_LOG(WARNING) << "No primitive type :" << (int)(type);
132-        return mindspore::kLiteSuccessExit;
133-      }
134-    }
135-    return mindspore::kSuccess;
136-  } else {
137-    MS_LOG(ERROR) << "primitive is nullptr.";
138-    return mindspore::kLiteError;
139-  }
140-}
141 namespace {
142 bool NeedBitUppackCheck(const schema::Tensor &src_tensor) {
143   if (src_tensor.enableHuffmanCode()) {
144diff --git a/mindspore/lite/src/litert/delegate/nnrt/checker/primitive_check.h b/mindspore/lite/src/litert/delegate/nnrt/checker/primitive_check.h
145index dbdd812c..46b812c0 100644
146--- a/mindspore/lite/src/litert/delegate/nnrt/checker/primitive_check.h
147+++ b/mindspore/lite/src/litert/delegate/nnrt/checker/primitive_check.h
148@@ -4,7 +4,6 @@
149 #include "include/api/status.h"
150 namespace mindspore {
151 namespace lite {
152-Status CheckPrimitiveSupported(const schema::Primitive *primitive);
153 Status CheckTensorSupported(const schema::Tensor *primitive);
154 }  // namespace lite
155 }  // namespace mindspore
156diff --git a/mindspore/lite/src/litert/delegate/nnrt/nnrt_allocator.cc b/mindspore/lite/src/litert/delegate/nnrt/nnrt_allocator.cc
157index f79c1682..b38fff62 100644
158--- a/mindspore/lite/src/litert/delegate/nnrt/nnrt_allocator.cc
159+++ b/mindspore/lite/src/litert/delegate/nnrt/nnrt_allocator.cc
160@@ -21,7 +21,6 @@
161 #include <mutex>
162 #include "src/litert/delegate/nnrt/nnrt_allocator.h"
163 #include "src/common/log.h"
164-#include "interfaces/kits/c/neural_network_runtime/neural_network_runtime.h"
165
166 namespace mindspore {
167 namespace lite {
168@@ -29,23 +28,17 @@ NNRTAllocator::~NNRTAllocator() {
169   std::lock_guard<std::mutex> locker(mutex_);
170   for (auto &it : allocated_list_) {
171     auto membuf = it.second;
172-    if (memory_category_ == NNRT_INPUT) {
173-      OH_NNExecutor_DestroyInputMemory(executor_, index_, &(membuf->memory_));
174-    } else {
175-      OH_NNExecutor_DestroyOutputMemory(executor_, index_, &(membuf->memory_));
176-    }
177-    free(membuf);
178+    OH_NNTensor_Destroy(&membuf->tensor_);
179+    OH_NNTensorDesc_Destroy(&membuf->tensor_desc_);
180+    delete membuf;
181   }
182   allocated_list_.clear();
183
184   for (auto &it : free_list_) {
185     auto membuf = it.second;
186-    if (memory_category_ == NNRT_INPUT) {
187-      OH_NNExecutor_DestroyInputMemory(executor_, index_, &(membuf->memory_));
188-    } else {
189-      OH_NNExecutor_DestroyOutputMemory(executor_, index_, &(membuf->memory_));
190-    }
191-    free(membuf);
192+    OH_NNTensor_Destroy(&membuf->tensor_);
193+    OH_NNTensorDesc_Destroy(&membuf->tensor_desc_);
194+    delete membuf;
195   }
196   free_list_.clear();
197 }
198@@ -57,8 +50,8 @@ void *NNRTAllocator::Malloc(size_t size) {
199     auto membuf = iter->second;
200     membuf->ref_count_ = 0;
201     (void)free_list_.erase(iter);
202-    allocated_list_[membuf->memory_->data] = membuf;
203-    return membuf->memory_->data;
204+    allocated_list_[membuf->data] = membuf;
205+    return membuf->data;
206   }
207
208   auto membuf = new (std::nothrow) MemBuf();
209@@ -66,30 +59,36 @@ void *NNRTAllocator::Malloc(size_t size) {
210     MS_LOG(ERROR) << "new Membuf failed.";
211     return nullptr;
212   }
213-
214   membuf->ref_count_ = 0;
215   if (memory_category_ == NNRT_INPUT) {
216-    membuf->memory_ = OH_NNExecutor_AllocateInputMemory(executor_, index_, size);
217+    membuf->tensor_desc_ = OH_NNExecutor_CreateInputTensorDesc(executor_, index_);
218   } else {
219-    membuf->memory_ = OH_NNExecutor_AllocateOutputMemory(executor_, index_, size);
220+    membuf->tensor_desc_ = OH_NNExecutor_CreateOutputTensorDesc(executor_, index_);
221   }
222-
223-  if (membuf->memory_ == nullptr) {
224-    MS_LOG(ERROR) << "malloc OH_NN_Memory return nullptr";
225+  if (membuf->tensor_desc_ == nullptr) {
226+    MS_LOG(ERROR) << "OH_NNExecutor_CreateInput/OutputTensorDesc failed, i = " << index_;
227+    delete membuf;
228+    return nullptr;
229+  }
230+  membuf->tensor_ = OH_NNTensor_CreateWithSize(device_id_, membuf->tensor_desc_, size);
231+  if (membuf->tensor_ == nullptr) {
232+    MS_LOG(ERROR) << "OH_NNTensor_CreateWithSize failed, i = " << index_;
233+    OH_NNTensorDesc_Destroy(&membuf->tensor_desc_);
234+    delete membuf;
235     return nullptr;
236   }
237-  if (membuf->memory_->data == nullptr) {
238-    MS_LOG(ERROR) << "malloc OH_NN_Memory return nullptr";
239-    if (memory_category_ == NNRT_INPUT) {
240-      OH_NNExecutor_DestroyInputMemory(executor_, index_, &(membuf->memory_));
241-    } else {
242-      OH_NNExecutor_DestroyOutputMemory(executor_, index_, &(membuf->memory_));
243-    }
244+  membuf->data = OH_NNTensor_GetDataBuffer(membuf->tensor_);
245+  if (membuf->data == nullptr) {
246+    MS_LOG(ERROR) << "OH_NNTensor_GetDataBuffer failed, i = " << index_;
247+    OH_NNTensor_Destroy(&membuf->tensor_);
248+    OH_NNTensorDesc_Destroy(&membuf->tensor_desc_);
249+    delete membuf;
250     return nullptr;
251   }
252
253-  allocated_list_[membuf->memory_->data] = membuf;
254-  return membuf->memory_->data;
255+  membuf->size = size;
256+  allocated_list_[membuf->data] = membuf;
257+  return membuf->data;
258 }
259
260 void NNRTAllocator::Free(void *ptr) {
261@@ -105,12 +104,12 @@ void NNRTAllocator::Free(void *ptr) {
262   auto membuf = iter->second;
263   membuf->ref_count_ = 0;
264   (void)allocated_list_.erase(iter);
265-  (void)free_list_.insert(std::make_pair(membuf->memory_->length, membuf));
266+  (void)free_list_.insert(std::make_pair(membuf->size, membuf));
267 }
268
269 int NNRTAllocator::RefCount(void *ptr) {
270   if (ptr == nullptr) {
271-    return -1;
272+    return NNRT_ALLOCATION;
273   }
274   std::lock_guard<std::mutex> locker(mutex_);
275   auto iter = allocated_list_.find(ptr);
276@@ -163,6 +162,5 @@ int NNRTAllocator::IncRefCount(void *ptr, int ref_count) {
277   }
278   return -1;
279 }
280-
281 }  // namespace lite
282 }  // namespace mindspore
283\ No newline at end of file
284diff --git a/mindspore/lite/src/litert/delegate/nnrt/nnrt_allocator.h b/mindspore/lite/src/litert/delegate/nnrt/nnrt_allocator.h
285index f6721369..52e6def7 100644
286--- a/mindspore/lite/src/litert/delegate/nnrt/nnrt_allocator.h
287+++ b/mindspore/lite/src/litert/delegate/nnrt/nnrt_allocator.h
288@@ -23,6 +23,9 @@
289 #include <map>
290 #include <mutex>
291 #include "include/api/allocator.h"
292+#include "src/tensor.h"
293+#include "interfaces/kits/c/neural_network_runtime/neural_network_runtime.h"
294+
295 struct OH_NN_Memory;
296 struct OH_NNExecutor;
297
298@@ -32,8 +35,8 @@ enum MemoryCategory { NNRT_INPUT, NNRT_OUTPUT };
299
300 class NNRTAllocator : public Allocator {
301  public:
302-  NNRTAllocator(OH_NNExecutor *executor, int index, MemoryCategory memory_category)
303-      : index_(index), memory_category_(memory_category), executor_(executor) {}
304+  NNRTAllocator(OH_NNExecutor *executor, int index, size_t device_id, MemoryCategory memory_category)
305+      : index_(index), device_id_(device_id), memory_category_(memory_category), executor_(executor) {}
306   ~NNRTAllocator() override;
307
308   void *Malloc(size_t size) override;
309@@ -42,14 +45,25 @@ class NNRTAllocator : public Allocator {
310   int SetRefCount(void *ptr, int ref_count) override;
311   int DecRefCount(void *ptr, int ref_count) override;
312   int IncRefCount(void *ptr, int ref_count) override;
313+  NN_Tensor *GetNNTensor(void *ptr) {
314+    auto iter = allocated_list_.find(ptr);
315+    if (iter != allocated_list_.end()) {
316+      return iter->second->tensor_;
317+    }
318+    return nullptr;
319+  }
320
321  private:
322   struct MemBuf {
323     std::atomic_int ref_count_{0};
324-    OH_NN_Memory *memory_{nullptr};
325+    NN_TensorDesc *tensor_desc_{nullptr};
326+    NN_Tensor *tensor_{nullptr};
327+    void *data{nullptr};
328+    size_t size{0};
329   };
330
331   int index_{0};
332+  size_t device_id_{0};
333   MemoryCategory memory_category_{NNRT_INPUT};
334   OH_NNExecutor *executor_{nullptr};
335   std::mutex mutex_;
336diff --git a/mindspore/lite/src/litert/delegate/nnrt/nnrt_delegate.cc b/mindspore/lite/src/litert/delegate/nnrt/nnrt_delegate.cc
337index d8450141..a949c910 100644
338--- a/mindspore/lite/src/litert/delegate/nnrt/nnrt_delegate.cc
339+++ b/mindspore/lite/src/litert/delegate/nnrt/nnrt_delegate.cc
340@@ -169,7 +169,7 @@ Status NNRTDelegate::CreateFullModelKernel(DelegateModel<schema::Primitive> *mod
341   }
342   OH_NNCompilation_Destroy(&nn_compilation);
343
344-  auto nnrt_model_kernel = new (std::nothrow)NNRTModelKernel(nn_executor, model->inputs(), model->outputs());
345+  auto nnrt_model_kernel = new (std::nothrow)NNRTModelKernel(nn_executor, nnrt_device_info_.device_id_, model->inputs(), model->outputs());
346   if (nnrt_model_kernel == nullptr) {
347     OH_NNExecutor_Destroy(&nn_executor);
348     MS_LOG(ERROR) << "new NNRTModelKernel failed";
349@@ -581,7 +581,7 @@ Status NNRTDelegate::CreateNNRTSubgraphKernels(DelegateModel<schema::Primitive>
350       continue ;
351     }
352
353-    auto nnrt_model_kernel = new (std::nothrow)NNRTModelKernel(nn_executor, in_tensors, out_tensors);
354+    auto nnrt_model_kernel = new (std::nothrow)NNRTModelKernel(nn_executor, nnrt_device_info_.device_id_, in_tensors, out_tensors);
355     if (nnrt_model_kernel == nullptr) {
356       MS_LOG(ERROR) << "new NNRTModelKernel failed";
357       return kLiteError;
358@@ -760,6 +760,15 @@ schema::Tensor *NNRTDelegate::TensorToSchemaTensor(Tensor *lite_tensor, schema::
359   memcpy(tensor_buf, buf, fbb.GetSize());
360   auto tensor = flatbuffers::GetRoot<schema::Tensor>(tensor_buf);
361   fbb.Clear();
362+  if (tensor != nullptr) {
363+    // use to free tensor_buf
364+    auto iter = dequant_schema_tensors_buffer_map_.find(const_cast<schema::Tensor *>(tensor));
365+    if (iter != dequant_schema_tensors_buffer_map_.end()) {
366+      MS_LOG(ERROR) << "schema tensor is duplicated.";
367+      return nullptr;
368+    }
369+    dequant_schema_tensors_buffer_map_[const_cast<schema::Tensor *>(tensor)] = tensor_buf;
370+  }
371   return const_cast<schema::Tensor *>(tensor);
372 }
373
374@@ -813,14 +822,6 @@ Status NNRTDelegate::DequantLiteGraph(LiteGraph *lite_graph) {
375 }
376
377 void NNRTDelegate::ShallowCopyLiteGraph(const lite::LiteGraph &lite_graph) {
378-  Status ret;
379-  for (auto node : lite_graph.all_nodes_) {
380-    ret = lite::CheckPrimitiveSupported(static_cast<const schema::Primitive *>(node->primitive_));
381-    if (ret == kLiteError) {
382-      MS_LOG(ERROR) << " primitive supported check failed.";
383-      return;
384-    }
385-  }
386   std::vector<LiteGraph::Node *> node_list;
387   node_list.reserve(lite_graph.all_nodes_.size());
388   // copy node
389@@ -856,7 +857,7 @@ void NNRTDelegate::ShallowCopyLiteGraph(const lite::LiteGraph &lite_graph) {
390     subgraph_list.emplace_back(new_subgraph);
391   }
392   for (auto tensor : lite_graph.all_tensors_) {
393-    ret = lite::CheckTensorSupported(static_cast<const schema::Tensor *>(tensor));
394+    Status ret = lite::CheckTensorSupported(static_cast<const schema::Tensor *>(tensor));
395     if (ret == kLiteError) {
396       MS_LOG(ERROR) << "tensor supported check failed.";
397       return;
398@@ -921,10 +922,13 @@ NNRTDelegate::~NNRTDelegate() {
399   if (lite_graph_ != nullptr) {
400     MS_LOG(ERROR) << "Delete NNRTDelegate.";
401   }
402-  for (auto iter : dequant_schema_tensors_) {
403-    delete iter.second;
404-    iter.second = nullptr;
405+  for (auto iter : dequant_schema_tensors_buffer_map_) {
406+    if (iter.second != nullptr) {
407+      free(iter.second);
408+      iter.second = nullptr;
409+    }
410   }
411+  dequant_schema_tensors_buffer_map_.clear();
412 }
413 }  // namespace lite
414 }  // namespace mindspore
415diff --git a/mindspore/lite/src/litert/delegate/nnrt/nnrt_delegate.h b/mindspore/lite/src/litert/delegate/nnrt/nnrt_delegate.h
416index 778553ef..db2f0ee7 100644
417--- a/mindspore/lite/src/litert/delegate/nnrt/nnrt_delegate.h
418+++ b/mindspore/lite/src/litert/delegate/nnrt/nnrt_delegate.h
419@@ -95,6 +95,7 @@ class NNRTDelegate : public Delegate {
420   std::vector<OH_NNExecutor *> nn_executor_list_;
421   std::vector<Tensor *> *dequant_src_tensors_;
422   std::map<uint32_t, schema::Tensor *> dequant_schema_tensors_;
423+  std::map<schema::Tensor *, void *> dequant_schema_tensors_buffer_map_;
424   std::vector<schema::Tensor *> replaced_schema_tensors_;
425 };
426 }  // namespace lite
427diff --git a/mindspore/lite/src/litert/delegate/nnrt/nnrt_model_kernel.cc b/mindspore/lite/src/litert/delegate/nnrt/nnrt_model_kernel.cc
428index f83632dd..2a66d133 100644
429--- a/mindspore/lite/src/litert/delegate/nnrt/nnrt_model_kernel.cc
430+++ b/mindspore/lite/src/litert/delegate/nnrt/nnrt_model_kernel.cc
431@@ -19,7 +19,7 @@
432 #include "litert/cxx_api/tensor/tensor_impl.h"
433 int mindspore::NNRTModelKernel::Prepare() {
434   for (size_t i = 0; i < inputs_.size(); i++) {
435-    auto nnrt_allocator = std::make_shared<lite::NNRTAllocator>(oh_nn_executor, i, lite::NNRT_INPUT);
436+    auto nnrt_allocator = std::make_shared<lite::NNRTAllocator>(oh_nn_executor, i, device_id_, lite::NNRT_INPUT);
437     if (nnrt_allocator == nullptr) {
438       MS_LOG(ERROR) << "Create NNRTAllocator failed";
439       return lite::RET_NULL_PTR;
440@@ -27,7 +27,7 @@ int mindspore::NNRTModelKernel::Prepare() {
441     inputs_[i].SetAllocator(nnrt_allocator);
442   }
443   for (size_t i = 0; i < outputs_.size(); i++) {
444-    auto nnrt_allocator = std::make_shared<lite::NNRTAllocator>(oh_nn_executor, i, lite::NNRT_OUTPUT);
445+    auto nnrt_allocator = std::make_shared<lite::NNRTAllocator>(oh_nn_executor, i, device_id_, lite::NNRT_OUTPUT);
446     if (nnrt_allocator == nullptr) {
447       MS_LOG(ERROR) << "Create NNRTAllocator failed";
448       return lite::RET_NULL_PTR;
449@@ -39,25 +39,33 @@ int mindspore::NNRTModelKernel::Prepare() {
450
451 int mindspore::NNRTModelKernel::Execute() {
452   MS_CHECK_TRUE_RET(this->outputs().empty() != true, lite::RET_ERROR);
453-  zero_copy_ = this->outputs()[Index0].allocator() != nullptr;
454+  zero_copy_ = IS_NNRT_ALLOCATOR(this->outputs()[Index0].allocator());
455
456+  if (!zero_copy_) {
457+    FreeNNTensor();
458+  }
459+  nn_input_tensors_.clear();
460+  nn_output_tensors_.clear();
461+  nn_input_tensor_descs_.clear();
462+  nn_output_tensor_descs_.clear();
463
464-  lite::STATUS ret_val = PrepareInputs();
465+  lite::STATUS ret_val = SetInputs();
466   if (ret_val != lite::RET_OK) {
467-    MS_LOG(ERROR) << "NNRTModelKernel PrepareInputs failed, STATUS is " << ret_val;
468+    MS_LOG(ERROR) << "NNRTModelKernel SetInputs failed, STATUS is " << ret_val;
469     return ret_val;
470   }
471-  ret_val = TransferOutputs();
472+  ret_val = SetOutputs();
473   if (ret_val != lite::RET_OK) {
474-    MS_LOG(ERROR) << "NNRTModelKernel TransferOutputs failed, STATUS is " << ret_val;
475+    MS_LOG(ERROR) << "NNRTModelKernel SetOutputs failed, STATUS is " << ret_val;
476     return ret_val;
477   }
478   MS_LOG(INFO) << "Running NNRtModel Kernel...";
479   OH_NN_ReturnCode ret_code;
480-  ret_code = OH_NNExecutor_Run(this->oh_nn_executor);
481+  ret_code = OH_NNExecutor_RunSync(oh_nn_executor, nn_input_tensors_.data(), nn_input_tensors_.size(),
482+                                   nn_output_tensors_.data(), nn_output_tensors_.size());
483
484   if (ret_code != OH_NN_SUCCESS) {
485-    MS_LOG(ERROR) << "NNExecutor Run failed, OH_NN_ReturnCode = " << ret_code;
486+    MS_LOG(ERROR) << "OH_NNExecutor_RunSync Run failed, OH_NN_ReturnCode = " << ret_code;
487     return lite::RET_ERROR;
488   }
489   MS_LOG(INFO) << "Run NNRtModel Kernel success.";
490@@ -120,97 +128,107 @@ OH_NN_DataType mindspore::NNRTModelKernel::ConvertDataType(mindspore::DataType d
491   }
492   return oh_data_type;
493 }
494-int mindspore::NNRTModelKernel::PrepareInputs() {
495-  auto input_tensors = this->inputs();
496-  for (size_t i = 0; i < input_tensors.size(); i++) {
497-    auto tensor = input_tensors[i];
498-    auto tensor_shape = tensor.Shape();
499-    auto tmp_quant_param = tensor.QuantParams();
500-    OH_NN_QuantParam *quant_param = nullptr;
501-    std::vector<uint32_t> bit_num;
502-    std::vector<double> scale;
503-    std::vector<int32_t> zero_point;
504-    if (!tmp_quant_param.empty()) {
505-      quant_param = (new (std::nothrow) OH_NN_QuantParam);
506-      if (quant_param == nullptr) {
507-        MS_LOG(ERROR) << "new OH_NN_QuantParam failed.";
508-        return lite::RET_NULL_PTR;
509-      }
510-      for (auto qparam : tmp_quant_param) {
511-        bit_num.emplace_back(qparam.bit_num);
512-        scale.emplace_back(qparam.scale);
513-        zero_point.emplace_back(qparam.zero_point);
514-      }
515-      quant_param->quantCount = tmp_quant_param.size();
516-      quant_param->numBits = bit_num.data();
517-      quant_param->scale = scale.data();
518-      quant_param->zeroPoint = zero_point.data();
519+
520+int mindspore::NNRTModelKernel::SetInputs() {
521+  if (!zero_copy_) {
522+    OH_NN_ReturnCode ret{OH_NN_FAILED};
523+    size_t nn_input_count = 0;
524+    ret = OH_NNExecutor_GetInputCount(oh_nn_executor, &nn_input_count);
525+    if (ret != OH_NN_SUCCESS) {
526+      MS_LOG(ERROR) << "OH_NNExecutor_GetInputCount failed.";
527+      return lite::RET_ERROR;
528     }
529-    auto oprend = new (std::nothrow) OH_NN_Tensor;
530-    if (oprend == nullptr) {
531-      MS_LOG(ERROR) << "new OH_NN_Tensor Failed";
532+    if (nn_input_count != inputs_.size()) {
533+      MS_LOG(ERROR) << "input count is not equal between ms and nnrt.";
534       return lite::RET_ERROR;
535     }
536-    oprend->dataType = ConvertDataType(tensor.DataType());
537-    oprend->dimensionCount = tensor_shape.size();
538-
539-    std::vector<int32_t> dimensions_list;
540-    for (auto shape : tensor_shape) {
541-      if (shape < INT32_MAX) {
542-        dimensions_list.emplace_back(static_cast<int32_t>(shape));
543-      } else {
544-        MS_LOG(ERROR) << "NNExecutor SetInput failed,tensor dimension is is too large, max dim = " << INT32_MAX
545-                      << ", but get dimension = " << shape;
546+    for (size_t i = 0; i < nn_input_count; i++) {
547+      NN_TensorDesc *tensor_desc_tmp = OH_NNExecutor_CreateInputTensorDesc(oh_nn_executor, i);
548+      if (tensor_desc_tmp == nullptr) {
549+        MS_LOG(ERROR) << "OH_NNExecutor_CreateInputTensorDesc failed, i = " << i;
550         return lite::RET_ERROR;
551       }
552+      nn_input_tensor_descs_.emplace_back(tensor_desc_tmp);
553+      NN_Tensor *tensor_tmp = OH_NNTensor_Create(device_id_, tensor_desc_tmp);
554+      if (tensor_tmp == nullptr) {
555+        MS_LOG(ERROR) << "OH_NNTensor_Create input failed, i = " << i;
556+        return lite::RET_ERROR;
557+      }
558+      nn_input_tensors_.emplace_back(tensor_tmp);
559+      void *nn_data = OH_NNTensor_GetDataBuffer(nn_input_tensors_[i]);
560+      size_t tensor_size;
561+      ret = OH_NNTensorDesc_GetByteSize(tensor_desc_tmp, &tensor_size);
562+      if (ret != OH_NN_SUCCESS || tensor_size != inputs_[i].DataSize()) {
563+        MS_LOG(ERROR) << "NN_Tensor size is not equal to MSTensor, i = " << i;
564+        return lite::RET_ERROR;
565+      }
566+      memcpy(nn_data, inputs_[i].MutableData(), inputs_[i].DataSize());
567     }
568-    oprend->dimensions = dimensions_list.data();
569-    oprend->quantParam = quant_param;
570-    oprend->type = OH_NN_TENSOR;
571-    MS_LOG_INFO << "input tensor: " << tensor.Name() << ", data: " << (void *)tensor.MutableData()
572-                << ", size: " << tensor.DataSize();
573-
574-    OH_NN_ReturnCode ret_code;
575-    if (zero_copy_) {
576-      OH_NN_Memory mem{tensor.MutableData(), tensor.DataSize()};
577-      ret_code = OH_NNExecutor_SetInputWithMemory(oh_nn_executor, i, oprend, &mem);
578-    } else {
579-      ret_code = OH_NNExecutor_SetInput(oh_nn_executor, i, oprend, tensor.MutableData(), tensor.DataSize());
580-    }
581-
582-    delete (oprend);
583-
584-    if (!tmp_quant_param.empty()) {
585-      free(quant_param);
586-      quant_param = nullptr;
587-    }
588-
589-    if (ret_code != OH_NN_SUCCESS) {
590-      MS_LOG(ERROR) << "NNExecutor SetInput failed, current input tensor is" << tensor.Name()
591-                    << "OH_NN_ReturnCode = " << ret_code;
592-      return lite::RET_ERROR;
593+  } else {
594+    for (size_t i = 0; i < inputs_.size(); i++) {
595+      void *data = inputs_[i].MutableData();
596+      NN_Tensor *tensor_tmp = reinterpret_cast<lite::NNRTAllocator *>(inputs_[i].allocator().get())->GetNNTensor(data);
597+      if (tensor_tmp == nullptr) {
598+        MS_LOG(ERROR) << "NNRTAllocator GetNNTensor failed, i = " << i;
599+        return lite::RET_ERROR;
600+      }
601+      nn_input_tensors_.emplace_back(tensor_tmp);
602     }
603   }
604-
605   return lite::RET_OK;
606 }
607-int mindspore::NNRTModelKernel::TransferOutputs() {
608-  auto output_tensors = this->outputs();
609-  for (size_t i = 0; i < output_tensors.size(); i++) {
610-    auto tensor = output_tensors[i];
611
612-    OH_NN_ReturnCode ret_code;
613-    if (zero_copy_) {
614-      OH_NN_Memory mem{tensor.MutableData(), tensor.DataSize()};
615-      ret_code = OH_NNExecutor_SetOutputWithMemory(oh_nn_executor, i, &mem);
616-    } else {
617-      ret_code = OH_NNExecutor_SetOutput(oh_nn_executor, i, tensor.MutableData(), tensor.DataSize());
618+int mindspore::NNRTModelKernel::SetOutputs() {
619+  if (!zero_copy_) {
620+    OH_NN_ReturnCode ret{OH_NN_FAILED};
621+    size_t nn_output_count = 0;
622+    ret = OH_NNExecutor_GetOutputCount(oh_nn_executor, &nn_output_count);
623+    if (ret != OH_NN_SUCCESS) {
624+      MS_LOG(ERROR) << "OH_NNExecutor_GetOutputCount failed.";
625+      return lite::RET_ERROR;
626     }
627-    if (ret_code != OH_NN_SUCCESS) {
628-      MS_LOG(ERROR) << "NNExecutor SetOutput failed, current out tensor is" << tensor.Name()
629-                    << ", OH_NN_ReturnCode = " << ret_code;
630+    if (nn_output_count != outputs_.size()) {
631+      MS_LOG(ERROR) << "output count is not equal between ms and nnrt.";
632       return lite::RET_ERROR;
633     }
634+    for (size_t i = 0; i < nn_output_count; i++) {
635+      NN_TensorDesc *tensor_desc_tmp = OH_NNExecutor_CreateOutputTensorDesc(oh_nn_executor, i);
636+      if (tensor_desc_tmp == nullptr) {
637+        MS_LOG(ERROR) << "OH_NNExecutor_CreateOutputTensorDesc failed, i = " << i;
638+        return lite::RET_ERROR;
639+      }
640+      nn_output_tensor_descs_.emplace_back(tensor_desc_tmp);
641+      NN_Tensor *tensor_tmp = OH_NNTensor_Create(device_id_, tensor_desc_tmp);
642+      if (tensor_tmp == nullptr) {
643+        MS_LOG(ERROR) << "OH_NNTensor_Create output failed, i = " << i;
644+        return lite::RET_ERROR;
645+      }
646+      nn_output_tensors_.emplace_back(tensor_tmp);
647+      auto data = OH_NNTensor_GetDataBuffer(nn_output_tensors_[i]);
648+      reinterpret_cast<LiteTensorImpl *>(outputs_[i].impl().get())->lite_tensor()->FreeData();
649+      outputs_[i].SetData(data, false);
650+    }
651+  } else {
652+    for (size_t i = 0; i < outputs_.size(); i++) {
653+      void *data = outputs_[i].MutableData();
654+      NN_Tensor *tensor_tmp = reinterpret_cast<lite::NNRTAllocator *>(outputs_[i].allocator().get())->GetNNTensor(data);
655+      if (tensor_tmp == nullptr) {
656+        MS_LOG(ERROR) << "NNRTAllocator GetNNTensor failed, i = " << i;
657+        return lite::RET_ERROR;
658+      }
659+      nn_output_tensors_.emplace_back(tensor_tmp);
660+    }
661   }
662   return lite::RET_OK;
663 }
664+
665+void mindspore::NNRTModelKernel::FreeNNTensor() {
666+  for (size_t i = 0; i < nn_input_tensors_.size(); i++) {
667+    OH_NNTensor_Destroy(&nn_input_tensors_[i]);
668+    OH_NNTensorDesc_Destroy(&nn_input_tensor_descs_[i]);
669+  }
670+  for (size_t i = 0; i < nn_output_tensors_.size(); i++) {
671+    OH_NNTensor_Destroy(&nn_output_tensors_[i]);
672+    OH_NNTensorDesc_Destroy(&nn_output_tensor_descs_[i]);
673+  }
674+}
675diff --git a/mindspore/lite/src/litert/delegate/nnrt/nnrt_model_kernel.h b/mindspore/lite/src/litert/delegate/nnrt/nnrt_model_kernel.h
676index 33df925c..40800a2a 100644
677--- a/mindspore/lite/src/litert/delegate/nnrt/nnrt_model_kernel.h
678+++ b/mindspore/lite/src/litert/delegate/nnrt/nnrt_model_kernel.h
679@@ -31,9 +31,9 @@ class NNRTModelKernel : public kernel::Kernel {
680    * Because nnr can't run single op, but the whole model. So we decide to make the whole model into one kernel.
681    * */
682  public:
683-  NNRTModelKernel(OH_NNExecutor *oh_nn_executor, const std::vector<mindspore::MSTensor> &inputs,
684+  NNRTModelKernel(OH_NNExecutor *oh_nn_executor, size_t device_id, const std::vector<mindspore::MSTensor> &inputs,
685                   const std::vector<mindspore::MSTensor> &outputs)
686-      : kernel::Kernel(inputs, outputs, nullptr, nullptr), oh_nn_executor(oh_nn_executor) {}
687+      : kernel::Kernel(inputs, outputs, nullptr, nullptr), device_id_(device_id), oh_nn_executor(oh_nn_executor) {}
688   int Prepare() override;
689   int Execute() override;
690   int ReSize() override {
691@@ -41,14 +41,23 @@ class NNRTModelKernel : public kernel::Kernel {
692     return lite::RET_ERROR;
693   };
694   OH_NN_DataType ConvertDataType(mindspore::DataType data_type);
695-  int PrepareInputs();
696-  int TransferOutputs();
697+  int SetInputs();
698+  int SetOutputs();
699+  void FreeNNTensor();
700   ~NNRTModelKernel() override {
701+    if (!zero_copy_) {
702+      FreeNNTensor();
703+    }
704     MS_LOG(INFO) << "NNRTModelKernel Destroy.";
705   }
706
707  protected:
708+  size_t device_id_;
709   OH_NNExecutor *oh_nn_executor = nullptr;
710+  std::vector<NN_Tensor *> nn_input_tensors_;
711+  std::vector<NN_TensorDesc *> nn_input_tensor_descs_;
712+  std::vector<NN_Tensor *> nn_output_tensors_;
713+  std::vector<NN_TensorDesc *> nn_output_tensor_descs_;
714
715  private:
716   bool zero_copy_{false};
717diff --git a/mindspore/lite/src/tensor.h b/mindspore/lite/src/tensor.h
718index f2eb4d1a..501e28e5 100644
719--- a/mindspore/lite/src/tensor.h
720+++ b/mindspore/lite/src/tensor.h
721@@ -38,10 +38,12 @@ namespace lite {
722 #define STATIC_ALLOCATION -271964
723 #define RUNTIME_REFCOUNT 0x9999
724 #define OPENCL_ALLOCATOR_REFCOUNT -10000
725+#define NNRT_ALLOCATION -10001
726 #define IS_STATIC_ALLOCATOR(allocator) ((allocator != nullptr) && (allocator->RefCount(nullptr) == STATIC_ALLOCATION))
727 #define IS_RUNTIME_ALLOCATOR(allocator) ((allocator != nullptr) && (allocator->RefCount(nullptr) == RUNTIME_REFCOUNT))
728 #define IS_OPENCL_ALLOCATOR(allocator) \
729   ((allocator != nullptr) && (allocator->RefCount(nullptr) == OPENCL_ALLOCATOR_REFCOUNT))
730+#define IS_NNRT_ALLOCATOR(allocator) ((allocator != nullptr) && (allocator->RefCount(nullptr) == NNRT_ALLOCATION))
731
732 struct LiteQuantParam {
733   double scale;
734--
7352.17.1
736
737