• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1From 464f222815cd1fed680d91569b29da5d6b4c281c Mon Sep 17 00:00:00 2001
2From: qinzheng4 <qinzheng4@huawei.com>
3Date: Mon, 19 Feb 2024 15:22:17 +0800
4Subject: [PATCH] 0009-npu-zero-copy
5
6---
7 include/c_api/tensor_c.h                      |  15 ++
8 mindspore/lite/BUILD.gn                       |   1 +
9 mindspore/lite/src/litert/c_api/model_c.cc    |  40 ++++-
10 mindspore/lite/src/litert/c_api/tensor_c.cc   |  32 ++++
11 .../lite/src/litert/c_api/type_c_private.h    |   3 +
12 .../src/litert/cxx_api/model/model_impl.cc    |  72 +++++++-
13 .../litert/delegate/nnrt/nnrt_allocator.cc    | 168 ++++++++++++++++++
14 .../src/litert/delegate/nnrt/nnrt_allocator.h |  64 +++++++
15 .../litert/delegate/nnrt/nnrt_model_kernel.cc |  50 +++++-
16 .../litert/delegate/nnrt/nnrt_model_kernel.h  |   3 +
17 .../litert/kernel/cpu/nnacl/nnacl_kernel.cc   |   2 +-
18 mindspore/lite/src/litert/mindrt_executor.cc  |  14 +-
19 12 files changed, 453 insertions(+), 11 deletions(-)
20 create mode 100644 mindspore/lite/src/litert/delegate/nnrt/nnrt_allocator.cc
21 create mode 100644 mindspore/lite/src/litert/delegate/nnrt/nnrt_allocator.h
22
23diff --git a/include/c_api/tensor_c.h b/include/c_api/tensor_c.h
24index 6d2aaab6..2f641725 100644
25--- a/include/c_api/tensor_c.h
26+++ b/include/c_api/tensor_c.h
27@@ -154,6 +154,21 @@ OH_AI_API int64_t OH_AI_TensorGetElementNum(const OH_AI_TensorHandle tensor);
28 /// \return The data size of the tensor.
29 OH_AI_API size_t OH_AI_TensorGetDataSize(const OH_AI_TensorHandle tensor);
30
31+/// \brief Obtain allocator for the tensor.
32+///
33+/// \param[in] tensor Tensor object handle.
34+///
35+/// \return The pointer of allocator.
36+OH_AI_API void *OH_AI_TensorGetAllocator(OH_AI_TensorHandle tensor);
37+
38+/// \brief Set allocator for the tensor.
39+///
40+/// \param[in] tensor Tensor object handle.
41+/// \param[in] allocator A pointer to the allocator.
42+///
43+/// \return OH_AI_STATUS_SUCCESS if success, or detail error code if failed.
44+OH_AI_API OH_AI_Status OH_AI_TensorSetAllocator(OH_AI_TensorHandle tensor, void *allocator);
45+
46 #ifdef __cplusplus
47 }
48 #endif
49diff --git a/mindspore/lite/BUILD.gn b/mindspore/lite/BUILD.gn
50index 4a83f498..723df1ec 100644
51--- a/mindspore/lite/BUILD.gn
52+++ b/mindspore/lite/BUILD.gn
53@@ -443,6 +443,7 @@ ohos_shared_library("mindspore_lib") {
54       "src/litert/delegate/nnrt/checker/primitive_check.cc",
55       "src/litert/delegate/nnrt/nnrt_delegate.cc",
56       "src/litert/delegate/nnrt/nnrt_model_kernel.cc",
57+      "src/litert/delegate/nnrt/nnrt_allocator.cc",
58     ]
59     include_dirs += [
60       "src/delegate/nnrt/include",
61diff --git a/mindspore/lite/src/litert/c_api/model_c.cc b/mindspore/lite/src/litert/c_api/model_c.cc
62index 9da52d76..20e1c227 100644
63--- a/mindspore/lite/src/litert/c_api/model_c.cc
64+++ b/mindspore/lite/src/litert/c_api/model_c.cc
65@@ -14,6 +14,7 @@
66  * limitations under the License.
67  */
68 #include "include/c_api/model_c.h"
69+#include "type_c_private.h"
70 #include <vector>
71 #include <cstdint>
72 #include "include/api/context.h"
73@@ -37,6 +38,11 @@ public:
74     for (auto out : outputs_train_) {
75       delete out;
76     }
77+
78+    // In zero copy scene where user will call set or get allocator function, but when model is destroyed, the allocator
79+    // table will not be freed, and its size continues to grow causing memory leak, so when ModelC is destroyed, clean
80+    // allocator table.
81+    CleanAllocatorTable();
82   }
83
84   MSTensor **GetInputs(size_t *input_num);
85@@ -246,10 +252,42 @@ OH_AI_Status OH_AI_ModelPredict(OH_AI_ModelHandle model, const OH_AI_TensorHandl
86   mindspore::MSKernelCallBack after_call_back = impl->TransCallBack(after);
87
88   std::vector<mindspore::MSTensor> ms_tensor_outputs;
89+
90+  bool all_has_data = false;
91+
92+  size_t output_num;
93+  (void)impl->GetOutputs(&output_num);
94+  auto handle_num = outputs->handle_num;
95+  if (handle_num == output_num) {
96+    MS_LOG(INFO) << "use user provided output";
97+    for (size_t i = 0; i < output_num; i++) {
98+      if (outputs->handle_list[i] == nullptr) {
99+        MS_LOG(ERROR) << "user provided output array handle_list[" << i << "] is nullptr";
100+        return OH_AI_STATUS_LITE_NULLPTR;
101+      }
102+      ms_tensor_outputs.push_back(*static_cast<mindspore::MSTensor *>(outputs->handle_list[i]));
103+    }
104+
105+    all_has_data = std::all_of(ms_tensor_outputs.begin(), ms_tensor_outputs.end(), [](const mindspore::MSTensor &t) {
106+      return t.Data() != nullptr;
107+    });
108+
109+    if (!all_has_data) {
110+      ms_tensor_outputs.clear();
111+    }
112+
113+  }
114+
115   auto ret = impl->model_->Predict(ms_tensor_inputs, &ms_tensor_outputs, before_call_back, after_call_back);
116   if (!ret.IsOk()) {
117     MS_LOG(ERROR) << "Predict fail, ret :" << ret;
118+    return static_cast<OH_AI_Status>(ret.StatusCode());
119   }
120+
121+  if (handle_num == output_num && all_has_data) {
122+    return OH_AI_STATUS_SUCCESS;
123+  }
124+
125   outputs->handle_list = reinterpret_cast<OH_AI_TensorHandle *>(impl->GetOutputs(&outputs->handle_num));
126   return static_cast<OH_AI_Status>(ret.StatusCode());
127 }
128@@ -345,7 +383,7 @@ char **OH_AI_TrainCfgGetLossName(OH_AI_TrainCfgHandle train_cfg, size_t *num) {
129   auto impl = static_cast<mindspore::TrainCfg *>(train_cfg);
130   auto loss_name = impl->GetLossName();
131   *num = loss_name.size();
132-  char **name = static_cast<char **>(malloc(loss_name.size()));
133+  char **name = static_cast<char **>(malloc(loss_name.size() * sizeof(char *)));
134   if (name == nullptr) {
135     MS_LOG(ERROR) << "Failed to malloc loss_name.";
136     return nullptr;
137diff --git a/mindspore/lite/src/litert/c_api/tensor_c.cc b/mindspore/lite/src/litert/c_api/tensor_c.cc
138index 4b1e6aff..fc3814dd 100644
139--- a/mindspore/lite/src/litert/c_api/tensor_c.cc
140+++ b/mindspore/lite/src/litert/c_api/tensor_c.cc
141@@ -13,11 +13,18 @@
142  * See the License for the specific language governing permissions and
143  * limitations under the License.
144  */
145+#include <unordered_map>
146 #include "include/c_api/tensor_c.h"
147 #include "include/api/status.h"
148 #include "src/tensor.h"
149 #include "src/litert/cxx_api/tensor/tensor_impl.h"
150
151+static std::unordered_map<void *, std::weak_ptr<mindspore::Allocator>> allocator_table;
152+
153+void CleanAllocatorTable() {
154+  allocator_table.clear();
155+}
156+
157 OH_AI_TensorHandle OH_AI_TensorCreate(const char *name, OH_AI_DataType type, const int64_t *shape, size_t shape_num,
158                                       const void *data, size_t data_len) {
159   if (name == nullptr || shape == nullptr) {
160@@ -208,3 +215,28 @@ size_t OH_AI_TensorGetDataSize(const OH_AI_TensorHandle tensor) {
161   auto impl = static_cast<mindspore::MSTensor *>(tensor);
162   return impl->DataSize();
163 }
164+
165+OH_AI_Status OH_AI_TensorSetAllocator(OH_AI_TensorHandle tensor, void *allocator) {
166+  if (tensor == nullptr) {
167+    MS_LOG(ERROR) << "param is nullptr.";
168+    return OH_AI_STATUS_LITE_NULLPTR;
169+  }
170+  auto impl = static_cast<mindspore::MSTensor *>(tensor);
171+  if (allocator_table.count(allocator) == 0) {
172+    MS_LOG(ERROR) << "the input allocator does not belong to framework";
173+    return OH_AI_STATUS_LITE_PARAM_INVALID;
174+  }
175+  std::static_pointer_cast<mindspore::LiteTensorImpl>(impl->impl())->set_own_data(true);
176+  impl->SetAllocator(allocator_table[allocator].lock());
177+  return OH_AI_STATUS_SUCCESS;
178+}
179+
180+void *OH_AI_TensorGetAllocator(const OH_AI_TensorHandle tensor) {
181+  if (tensor == nullptr) {
182+    MS_LOG(ERROR) << "param is nullptr.";
183+    return nullptr;
184+  }
185+  auto impl = static_cast<mindspore::MSTensor *>(tensor);
186+  allocator_table[impl->allocator().get()] = impl->allocator();
187+  return impl->allocator().get();
188+}
189diff --git a/mindspore/lite/src/litert/c_api/type_c_private.h b/mindspore/lite/src/litert/c_api/type_c_private.h
190index 2d3b3883..1a76820d 100644
191--- a/mindspore/lite/src/litert/c_api/type_c_private.h
192+++ b/mindspore/lite/src/litert/c_api/type_c_private.h
193@@ -36,5 +36,8 @@ struct NNRTDeviceDesc {
194
195 #ifdef __cplusplus
196 }
197+
198+void CleanAllocatorTable();
199+
200 #endif
201 #endif  // MINDSPORE_LITE_SRC_LITERT_C_API_TYPE_C_PRIVATE_H_
202diff --git a/mindspore/lite/src/litert/cxx_api/model/model_impl.cc b/mindspore/lite/src/litert/cxx_api/model/model_impl.cc
203index 78b1ca67..5d1b78a2 100644
204--- a/mindspore/lite/src/litert/cxx_api/model/model_impl.cc
205+++ b/mindspore/lite/src/litert/cxx_api/model/model_impl.cc
206@@ -463,7 +463,55 @@ Status ModelImpl::Predict(const std::vector<MSTensor> &inputs, std::vector<MSTen
207           input->set_shape(truncate_shape);
208 #endif
209         }
210-        input->set_data(user_input.MutableData());
211+        if (user_input.allocator() == input->allocator()) {
212+          input->set_data(user_input.MutableData());
213+          input->set_own_data(false);
214+        } else {
215+          void *user_data = user_input.MutableData();
216+          if (user_data == nullptr) {
217+            MS_LOG(ERROR) << "user data is nullptr";
218+            return kLiteNullptr;
219+          }
220+          void *input_data = input->MutableData();
221+          if (input_data == nullptr) {
222+            MS_LOG(ERROR) << "input data is nullptr";
223+            return kLiteNullptr;
224+          }
225+          memcpy(input_data, user_data, input->Size());
226+        }
227+      }
228+    }
229+  }
230+
231+  auto ori_output_tensors = GetOutputs();
232+  std::vector<bool> copy_output_data;
233+  copy_output_data.resize(ori_output_tensors.size(), false);
234+  if (outputs->empty()) {
235+    MS_LOG(INFO) << "user provided output is empty";
236+  } else if (outputs->size() != ori_output_tensors.size()) {
237+    MS_LOG(ERROR) << "user provided output size is not equal to model's output size";
238+    return kLiteError;
239+  } else {
240+    for (size_t i = 0; i < ori_output_tensors.size(); i++) {
241+      auto ori_output = ori_output_tensors[i];
242+      auto lite_impl = std::static_pointer_cast<LiteTensorImpl>(ori_output.impl());
243+      MS_CHECK_TRUE_RET(lite_impl != nullptr, kLiteNullptr);
244+      auto ori_out_tensor = static_cast<lite::Tensor *>(lite_impl->lite_tensor());
245+      MS_CHECK_TRUE_RET(ori_out_tensor != nullptr, kLiteNullptr);
246+
247+      auto user_output = (*outputs)[i];
248+      if (ori_output.impl() == user_output.impl()) {
249+        continue;
250+      }
251+
252+      auto user_out_data = user_output.MutableData();
253+      MS_CHECK_TRUE_RET(user_out_data != nullptr, kLiteNullptr);
254+      if (ori_out_tensor->allocator() == user_output.allocator()) {
255+        MS_LOG(INFO) << "use user data";
256+        ori_out_tensor->set_data(user_out_data);
257+        ori_out_tensor->set_own_data(false);
258+      } else {
259+        copy_output_data[i] = true;
260       }
261     }
262   }
263@@ -474,6 +522,28 @@ Status ModelImpl::Predict(const std::vector<MSTensor> &inputs, std::vector<MSTen
264     return ret;
265   }
266   MS_LOG(DEBUG) << "Run graph success.";
267+
268+  for (size_t i = 0; i < copy_output_data.size(); i++) {
269+    if (!copy_output_data[i]) {
270+      continue;
271+    }
272+    auto ori_output = ori_output_tensors[i];
273+    auto ori_out_data = ori_output.MutableData();
274+    MS_CHECK_TRUE_RET(ori_out_data != nullptr, kLiteNullptr);
275+    auto user_output = (*outputs)[i];
276+    MS_CHECK_TRUE_RET(user_output.MutableData() != nullptr, kLiteNullptr);
277+    if (user_output.DataSize() >= ori_output.DataSize()) {
278+      memcpy(user_output.MutableData(), ori_out_data, ori_output.DataSize());
279+    } else {
280+      MS_LOG(ERROR) << "user out data size is less than model's output data size";
281+      return kLiteError;
282+    }
283+  }
284+
285+  if (outputs->size() == ori_output_tensors.size()) {
286+    return kSuccess;
287+  }
288+
289   auto res = GetOutputs();
290   if (res.empty()) {
291     MS_LOG(DEBUG) << "Empty outputs.";
292diff --git a/mindspore/lite/src/litert/delegate/nnrt/nnrt_allocator.cc b/mindspore/lite/src/litert/delegate/nnrt/nnrt_allocator.cc
293new file mode 100644
294index 00000000..f79c1682
295--- /dev/null
296+++ b/mindspore/lite/src/litert/delegate/nnrt/nnrt_allocator.cc
297@@ -0,0 +1,168 @@
298+/**
299+ * Copyright 2023 Huawei Technologies Co., Ltd
300+ *
301+ * Licensed under the Apache License, Version 2.0 (the "License");
302+ * you may not use this file except in compliance with the License.
303+ * You may obtain a copy of the License at
304+ *
305+ * http://www.apache.org/licenses/LICENSE-2.0
306+ *
307+ * Unless required by applicable law or agreed to in writing, software
308+ * distributed under the License is distributed on an "AS IS" BASIS,
309+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
310+ * See the License for the specific language governing permissions and
311+ * limitations under the License.
312+ */
313+
314+#include <memory>
315+#include <atomic>
316+#include <unordered_map>
317+#include <map>
318+#include <mutex>
319+#include "src/litert/delegate/nnrt/nnrt_allocator.h"
320+#include "src/common/log.h"
321+#include "interfaces/kits/c/neural_network_runtime/neural_network_runtime.h"
322+
323+namespace mindspore {
324+namespace lite {
325+NNRTAllocator::~NNRTAllocator() {
326+  std::lock_guard<std::mutex> locker(mutex_);
327+  for (auto &it : allocated_list_) {
328+    auto membuf = it.second;
329+    if (memory_category_ == NNRT_INPUT) {
330+      OH_NNExecutor_DestroyInputMemory(executor_, index_, &(membuf->memory_));
331+    } else {
332+      OH_NNExecutor_DestroyOutputMemory(executor_, index_, &(membuf->memory_));
333+    }
334+    free(membuf);
335+  }
336+  allocated_list_.clear();
337+
338+  for (auto &it : free_list_) {
339+    auto membuf = it.second;
340+    if (memory_category_ == NNRT_INPUT) {
341+      OH_NNExecutor_DestroyInputMemory(executor_, index_, &(membuf->memory_));
342+    } else {
343+      OH_NNExecutor_DestroyOutputMemory(executor_, index_, &(membuf->memory_));
344+    }
345+    free(membuf);
346+  }
347+  free_list_.clear();
348+}
349+
350+void *NNRTAllocator::Malloc(size_t size) {
351+  std::lock_guard<std::mutex> locker(mutex_);
352+  auto iter = free_list_.lower_bound(size);
353+  if (iter != free_list_.end()) {
354+    auto membuf = iter->second;
355+    membuf->ref_count_ = 0;
356+    (void)free_list_.erase(iter);
357+    allocated_list_[membuf->memory_->data] = membuf;
358+    return membuf->memory_->data;
359+  }
360+
361+  auto membuf = new (std::nothrow) MemBuf();
362+  if (membuf == nullptr) {
363+    MS_LOG(ERROR) << "new Membuf failed.";
364+    return nullptr;
365+  }
366+
367+  membuf->ref_count_ = 0;
368+  if (memory_category_ == NNRT_INPUT) {
369+    membuf->memory_ = OH_NNExecutor_AllocateInputMemory(executor_, index_, size);
370+  } else {
371+    membuf->memory_ = OH_NNExecutor_AllocateOutputMemory(executor_, index_, size);
372+  }
373+
374+  if (membuf->memory_ == nullptr) {
375+    MS_LOG(ERROR) << "malloc OH_NN_Memory return nullptr";
376+    return nullptr;
377+  }
378+  if (membuf->memory_->data == nullptr) {
379+    MS_LOG(ERROR) << "malloc OH_NN_Memory return nullptr";
380+    if (memory_category_ == NNRT_INPUT) {
381+      OH_NNExecutor_DestroyInputMemory(executor_, index_, &(membuf->memory_));
382+    } else {
383+      OH_NNExecutor_DestroyOutputMemory(executor_, index_, &(membuf->memory_));
384+    }
385+    return nullptr;
386+  }
387+
388+  allocated_list_[membuf->memory_->data] = membuf;
389+  return membuf->memory_->data;
390+}
391+
392+void NNRTAllocator::Free(void *ptr) {
393+  if (ptr == nullptr) {
394+    return;
395+  }
396+
397+  std::lock_guard<std::mutex> locker(mutex_);
398+  auto iter = allocated_list_.find(ptr);
399+  if (iter == allocated_list_.end()) {
400+    return;
401+  }
402+  auto membuf = iter->second;
403+  membuf->ref_count_ = 0;
404+  (void)allocated_list_.erase(iter);
405+  (void)free_list_.insert(std::make_pair(membuf->memory_->length, membuf));
406+}
407+
408+int NNRTAllocator::RefCount(void *ptr) {
409+  if (ptr == nullptr) {
410+    return -1;
411+  }
412+  std::lock_guard<std::mutex> locker(mutex_);
413+  auto iter = allocated_list_.find(ptr);
414+  if (iter != allocated_list_.end()) {
415+    auto membuf = iter->second;
416+    int ref_count = std::atomic_load(&membuf->ref_count_);
417+    return ref_count;
418+  }
419+  return -1;
420+}
421+
422+int NNRTAllocator::SetRefCount(void *ptr, int ref_count) {
423+  if (ptr == nullptr) {
424+    return -1;
425+  }
426+  std::lock_guard<std::mutex> locker(mutex_);
427+  auto iter = allocated_list_.find(ptr);
428+  if (iter != allocated_list_.end()) {
429+    auto membuf = iter->second;
430+    std::atomic_store(&membuf->ref_count_, ref_count);
431+    return ref_count;
432+  }
433+  return -1;
434+}
435+
436+int NNRTAllocator::DecRefCount(void *ptr, int ref_count) {
437+  if (ptr == nullptr) {
438+    return -1;
439+  }
440+  std::lock_guard<std::mutex> locker(mutex_);
441+  auto iter = allocated_list_.find(ptr);
442+  if (iter != allocated_list_.end()) {
443+    auto membuf = iter->second;
444+    auto ref = std::atomic_fetch_sub(&membuf->ref_count_, ref_count);
445+    return ref;
446+  }
447+  return -1;
448+}
449+
450+int NNRTAllocator::IncRefCount(void *ptr, int ref_count) {
451+  if (ptr == nullptr) {
452+    return -1;
453+  }
454+  std::lock_guard<std::mutex> locker(mutex_);
455+  auto iter = allocated_list_.find(ptr);
456+  if (iter != allocated_list_.end()) {
457+    auto membuf = iter->second;
458+    auto ref = std::atomic_fetch_add(&membuf->ref_count_, ref_count);
459+    return ref;
460+  }
461+  return -1;
462+}
463+
464+}  // namespace lite
465+}  // namespace mindspore
466\ No newline at end of file
467diff --git a/mindspore/lite/src/litert/delegate/nnrt/nnrt_allocator.h b/mindspore/lite/src/litert/delegate/nnrt/nnrt_allocator.h
468new file mode 100644
469index 00000000..f6721369
470--- /dev/null
471+++ b/mindspore/lite/src/litert/delegate/nnrt/nnrt_allocator.h
472@@ -0,0 +1,64 @@
473+/**
474+* Copyright 2023 Huawei Technologies Co., Ltd
475+*
476+* Licensed under the Apache License, Version 2.0 (the "License");
477+* you may not use this file except in compliance with the License.
478+* You may obtain a copy of the License at
479+*
480+* http://www.apache.org/licenses/LICENSE-2.0
481+*
482+* Unless required by applicable law or agreed to in writing, software
483+* distributed under the License is distributed on an "AS IS" BASIS,
484+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
485+* See the License for the specific language governing permissions and
486+* limitations under the License.
487+ */
488+#ifndef MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_NNRT_NNRT_ALLOCATOR_H_
489+#define MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_NNRT_NNRT_ALLOCATOR_H_
490+
491+#include <vector>
492+#include <map>
493+#include <atomic>
494+#include <unordered_map>
495+#include <map>
496+#include <mutex>
497+#include "include/api/allocator.h"
498+struct OH_NN_Memory;
499+struct OH_NNExecutor;
500+
501+namespace mindspore {
502+namespace lite {
503+enum MemoryCategory { NNRT_INPUT, NNRT_OUTPUT };
504+
505+class NNRTAllocator : public Allocator {
506+ public:
507+  NNRTAllocator(OH_NNExecutor *executor, int index, MemoryCategory memory_category)
508+      : index_(index), memory_category_(memory_category), executor_(executor) {}
509+  ~NNRTAllocator() override;
510+
511+  void *Malloc(size_t size) override;
512+  void Free(void *ptr) override;
513+  int RefCount(void *ptr) override;
514+  int SetRefCount(void *ptr, int ref_count) override;
515+  int DecRefCount(void *ptr, int ref_count) override;
516+  int IncRefCount(void *ptr, int ref_count) override;
517+
518+ private:
519+  struct MemBuf {
520+    std::atomic_int ref_count_{0};
521+    OH_NN_Memory *memory_{nullptr};
522+  };
523+
524+  int index_{0};
525+  MemoryCategory memory_category_{NNRT_INPUT};
526+  OH_NNExecutor *executor_{nullptr};
527+  std::mutex mutex_;
528+  // <membuf->memory_->data, membuf>
529+  std::unordered_map<void *, MemBuf *> allocated_list_;
530+  std::multimap<size_t, MemBuf *> free_list_;
531+};
532+
533+}  // namespace lite
534+}  // namespace mindspore
535+
536+#endif  // MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_NNRT_NNRT_ALLOCATOR_H_
537\ No newline at end of file
538diff --git a/mindspore/lite/src/litert/delegate/nnrt/nnrt_model_kernel.cc b/mindspore/lite/src/litert/delegate/nnrt/nnrt_model_kernel.cc
539index 67443e08..f83632dd 100644
540--- a/mindspore/lite/src/litert/delegate/nnrt/nnrt_model_kernel.cc
541+++ b/mindspore/lite/src/litert/delegate/nnrt/nnrt_model_kernel.cc
542@@ -15,8 +15,33 @@
543  */
544 #include <include/errorcode.h>
545 #include "nnrt_model_kernel.h"
546-int mindspore::NNRTModelKernel::Prepare() { return 0; }
547+#include "nnrt_allocator.h"
548+#include "litert/cxx_api/tensor/tensor_impl.h"
549+int mindspore::NNRTModelKernel::Prepare() {
550+  for (size_t i = 0; i < inputs_.size(); i++) {
551+    auto nnrt_allocator = std::make_shared<lite::NNRTAllocator>(oh_nn_executor, i, lite::NNRT_INPUT);
552+    if (nnrt_allocator == nullptr) {
553+      MS_LOG(ERROR) << "Create NNRTAllocator failed";
554+      return lite::RET_NULL_PTR;
555+    }
556+    inputs_[i].SetAllocator(nnrt_allocator);
557+  }
558+  for (size_t i = 0; i < outputs_.size(); i++) {
559+    auto nnrt_allocator = std::make_shared<lite::NNRTAllocator>(oh_nn_executor, i, lite::NNRT_OUTPUT);
560+    if (nnrt_allocator == nullptr) {
561+      MS_LOG(ERROR) << "Create NNRTAllocator failed";
562+      return lite::RET_NULL_PTR;
563+    }
564+    outputs_[i].SetAllocator(nnrt_allocator);
565+  }
566+  return lite::RET_OK;
567+}
568+
569 int mindspore::NNRTModelKernel::Execute() {
570+  MS_CHECK_TRUE_RET(this->outputs().empty() != true, lite::RET_ERROR);
571+  zero_copy_ = this->outputs()[Index0].allocator() != nullptr;
572+
573+
574   lite::STATUS ret_val = PrepareInputs();
575   if (ret_val != lite::RET_OK) {
576     MS_LOG(ERROR) << "NNRTModelKernel PrepareInputs failed, STATUS is " << ret_val;
577@@ -142,9 +167,17 @@ int mindspore::NNRTModelKernel::PrepareInputs() {
578     oprend->dimensions = dimensions_list.data();
579     oprend->quantParam = quant_param;
580     oprend->type = OH_NN_TENSOR;
581-    MS_LOG_INFO << "input tensor: " << tensor.Name() << ", data: " << (void *)tensor.MutableData() << ", size: " << tensor.DataSize();
582-    OH_NN_ReturnCode ret_code =
583-      OH_NNExecutor_SetInput(oh_nn_executor, i, oprend, tensor.MutableData(), tensor.DataSize());
584+    MS_LOG_INFO << "input tensor: " << tensor.Name() << ", data: " << (void *)tensor.MutableData()
585+                << ", size: " << tensor.DataSize();
586+
587+    OH_NN_ReturnCode ret_code;
588+    if (zero_copy_) {
589+      OH_NN_Memory mem{tensor.MutableData(), tensor.DataSize()};
590+      ret_code = OH_NNExecutor_SetInputWithMemory(oh_nn_executor, i, oprend, &mem);
591+    } else {
592+      ret_code = OH_NNExecutor_SetInput(oh_nn_executor, i, oprend, tensor.MutableData(), tensor.DataSize());
593+    }
594+
595     delete (oprend);
596
597     if (!tmp_quant_param.empty()) {
598@@ -165,7 +198,14 @@ int mindspore::NNRTModelKernel::TransferOutputs() {
599   auto output_tensors = this->outputs();
600   for (size_t i = 0; i < output_tensors.size(); i++) {
601     auto tensor = output_tensors[i];
602-    OH_NN_ReturnCode ret_code = OH_NNExecutor_SetOutput(oh_nn_executor, i, tensor.MutableData(), tensor.DataSize());
603+
604+    OH_NN_ReturnCode ret_code;
605+    if (zero_copy_) {
606+      OH_NN_Memory mem{tensor.MutableData(), tensor.DataSize()};
607+      ret_code = OH_NNExecutor_SetOutputWithMemory(oh_nn_executor, i, &mem);
608+    } else {
609+      ret_code = OH_NNExecutor_SetOutput(oh_nn_executor, i, tensor.MutableData(), tensor.DataSize());
610+    }
611     if (ret_code != OH_NN_SUCCESS) {
612       MS_LOG(ERROR) << "NNExecutor SetOutput failed, current out tensor is" << tensor.Name()
613                     << ", OH_NN_ReturnCode = " << ret_code;
614diff --git a/mindspore/lite/src/litert/delegate/nnrt/nnrt_model_kernel.h b/mindspore/lite/src/litert/delegate/nnrt/nnrt_model_kernel.h
615index ea15f7ca..4f2d4f19 100644
616--- a/mindspore/lite/src/litert/delegate/nnrt/nnrt_model_kernel.h
617+++ b/mindspore/lite/src/litert/delegate/nnrt/nnrt_model_kernel.h
618@@ -51,6 +51,9 @@ class NNRTModelKernel : public kernel::Kernel {
619
620  protected:
621   OH_NNExecutor *oh_nn_executor = nullptr;
622+
623+ private:
624+  bool zero_copy_{false};
625 };
626 }  // namespace mindspore
627
628diff --git a/mindspore/lite/src/litert/kernel/cpu/nnacl/nnacl_kernel.cc b/mindspore/lite/src/litert/kernel/cpu/nnacl/nnacl_kernel.cc
629index 813a6467..6cedc8c9 100644
630--- a/mindspore/lite/src/litert/kernel/cpu/nnacl/nnacl_kernel.cc
631+++ b/mindspore/lite/src/litert/kernel/cpu/nnacl/nnacl_kernel.cc
632@@ -105,7 +105,7 @@ int NNACLKernel::OptimizeDataCopy() {
633
634   if (input_tensor->allocator() == nullptr || input_tensor->allocator() != output_tensor->allocator() ||
635       input_tensor->allocator() != ms_context_->allocator || /* runtime allocator */
636-      op_parameter_->is_train_session_) {
637+      op_parameter_->is_train_session_ || !output_tensor->own_data()) {
638     return NNACLKernel::Run();
639   }
640
641diff --git a/mindspore/lite/src/litert/mindrt_executor.cc b/mindspore/lite/src/litert/mindrt_executor.cc
642index e5cd720c..5c08cedf 100644
643--- a/mindspore/lite/src/litert/mindrt_executor.cc
644+++ b/mindspore/lite/src/litert/mindrt_executor.cc
645@@ -295,14 +295,22 @@ void MindrtExecutor::FreeOutputTensor() {
646     if (dst_tensor->data_type() == kNumberTypeGLUInt && src_tensor->data_type() == kNumberTypeGLUInt) {
647       continue;
648     }
649-    if (dst_tensor->allocator() != nullptr) {
650+
651+    if ((dst_tensor->allocator() != nullptr && dst_tensor->own_data()) || dst_tensor->data() == nullptr) {
652+      MS_LOG(DEBUG) << "free data";
653       dst_tensor->FreeData();
654-    } else {
655-      if (dst_tensor->data_type() == src_tensor->data_type()) {
656+    } else if (dst_tensor->data() != nullptr && dst_tensor->data_type() == src_tensor->data_type()) {
657+      if (dst_tensor->allocator() == nullptr) {
658         /* user set graph-output-tensor from outside */
659+        MS_LOG(DEBUG) << "user set graph-output-tensor from outside";
660         src_tensor->set_data(dst_tensor->data());
661         src_tensor->set_own_data(false);
662         src_tensor->set_allocator(nullptr);
663+      } else if (dst_tensor->allocator() == src_tensor->allocator()) {
664+        /* nnrt npu zero copy scene */
665+        MS_LOG(DEBUG) << "zero copy data";
666+        src_tensor->set_data(dst_tensor->data());
667+        src_tensor->set_own_data(dst_tensor->own_data());
668       }
669     }
670   }
671--
6722.25.1
673
674