1From 3793994296c2ede3f79544d613acd8f6600ec9fb Mon Sep 17 00:00:00 2001 2From: chengfeng27 <chengfeng27@huawei.com> 3Date: Fri, 7 Jun 2024 15:31:09 +0800 4Subject: fix lite_graph dequant crash 5 6--- 7 .../delegate/nnrt/checker/primitive_check.cc | 115 ----------- 8 .../delegate/nnrt/checker/primitive_check.h | 1 - 9 .../litert/delegate/nnrt/nnrt_allocator.cc | 64 +++--- 10 .../src/litert/delegate/nnrt/nnrt_allocator.h | 20 +- 11 .../src/litert/delegate/nnrt/nnrt_delegate.cc | 32 +-- 12 .../src/litert/delegate/nnrt/nnrt_delegate.h | 1 + 13 .../litert/delegate/nnrt/nnrt_model_kernel.cc | 190 ++++++++++-------- 14 .../litert/delegate/nnrt/nnrt_model_kernel.h | 17 +- 15 mindspore/lite/src/tensor.h | 2 + 16 9 files changed, 186 insertions(+), 256 deletions(-) 17 18diff --git a/mindspore/lite/src/litert/delegate/nnrt/checker/primitive_check.cc b/mindspore/lite/src/litert/delegate/nnrt/checker/primitive_check.cc 19index 6b191c8e..67d60f1b 100644 20--- a/mindspore/lite/src/litert/delegate/nnrt/checker/primitive_check.cc 21+++ b/mindspore/lite/src/litert/delegate/nnrt/checker/primitive_check.cc 22@@ -7,121 +7,6 @@ 23 #include "src/common/utils.h" 24 namespace mindspore { 25 namespace lite { 26- 27-Status CheckPrimitiveSupported(const schema::Primitive *primitive) { 28- if (primitive != nullptr) { 29- auto prim = primitive; 30- auto type = prim->value_type(); 31- switch (type) { 32- case schema::PrimitiveType_Activation: 33- return mindspore::kSuccess; 34- case schema::PrimitiveType_AddFusion: 35- return mindspore::kSuccess; 36- case schema::PrimitiveType_ArgMaxFusion: 37- return mindspore::kSuccess; 38- case schema::PrimitiveType_AvgPoolFusion: 39- return mindspore::kSuccess; 40- case schema::PrimitiveType_BatchToSpaceND: 41- return mindspore::kSuccess; 42- case schema::PrimitiveType_BiasAdd: 43- return mindspore::kSuccess; 44- case schema::PrimitiveType_Cast: 45- return mindspore::kSuccess; 46- case schema::PrimitiveType_Concat: 47- return mindspore::kSuccess; 48- case schema::PrimitiveType_Conv2DFusion: 49- return mindspore::kSuccess; 50- case schema::PrimitiveType_Conv2dTransposeFusion: 51- return mindspore::kSuccess; 52- case schema::PrimitiveType_DivFusion: 53- return mindspore::kSuccess; 54- case schema::PrimitiveType_Eltwise: 55- return mindspore::kSuccess; 56- case schema::PrimitiveType_ExpandDims: 57- return mindspore::kSuccess; 58- case schema::PrimitiveType_Fill: 59- return mindspore::kSuccess; 60- case schema::PrimitiveType_FullConnection: 61- return mindspore::kSuccess; 62- case schema::PrimitiveType_FusedBatchNorm: 63- return mindspore::kSuccess; 64- case schema::PrimitiveType_Gather: 65- return mindspore::kSuccess; 66- case schema::PrimitiveType_LayerNormFusion: 67- return mindspore::kSuccess; 68- case schema::PrimitiveType_LessEqual: 69- return mindspore::kSuccess; 70- case schema::PrimitiveType_MatMulFusion: 71- return mindspore::kSuccess; 72- case schema::PrimitiveType_Maximum: 73- return mindspore::kSuccess; 74- case schema::PrimitiveType_MaxPoolFusion: 75- return mindspore::kSuccess; 76- case schema::PrimitiveType_MulFusion: 77- return mindspore::kSuccess; 78- case schema::PrimitiveType_OneHot: 79- return mindspore::kSuccess; 80- case schema::PrimitiveType_PadFusion: 81- return mindspore::kSuccess; 82- case schema::PrimitiveType_PowFusion: 83- return mindspore::kSuccess; 84- case schema::PrimitiveType_PReLUFusion: 85- return mindspore::kSuccess; 86- case schema::PrimitiveType_QuantDTypeCast: 87- return mindspore::kSuccess; 88- case schema::PrimitiveType_ReduceFusion: 89- return mindspore::kSuccess; 90- case schema::PrimitiveType_Reshape: 91- return mindspore::kSuccess; 92- case schema::PrimitiveType_Resize: 93- return mindspore::kSuccess; 94- case schema::PrimitiveType_Rsqrt: 95- return mindspore::kSuccess; 96- case schema::PrimitiveType_ScaleFusion: 97- return mindspore::kSuccess; 98- case schema::PrimitiveType_Shape: 99- return mindspore::kSuccess; 100- case schema::PrimitiveType_SliceFusion: 101- return mindspore::kSuccess; 102- case schema::PrimitiveType_Softmax: 103- return mindspore::kSuccess; 104- case schema::PrimitiveType_SpaceToBatchND: 105- return mindspore::kSuccess; 106- case schema::PrimitiveType_Split: 107- return mindspore::kSuccess; 108- case schema::PrimitiveType_Sqrt: 109- return mindspore::kSuccess; 110- case schema::PrimitiveType_SquaredDifference: 111- return mindspore::kSuccess; 112- case schema::PrimitiveType_Squeeze: 113- return mindspore::kSuccess; 114- case schema::PrimitiveType_Stack: 115- return mindspore::kSuccess; 116- case schema::PrimitiveType_StridedSlice: 117- return mindspore::kSuccess; 118- case schema::PrimitiveType_SubFusion: 119- return mindspore::kSuccess; 120- case schema::PrimitiveType_TileFusion: 121- return mindspore::kSuccess; 122- case schema::PrimitiveType_TopKFusion: 123- return mindspore::kSuccess; 124- case schema::PrimitiveType_Transpose: 125- return mindspore::kSuccess; 126- case schema::PrimitiveType_Unsqueeze: 127- return mindspore::kSuccess; 128- case schema::PrimitiveType_Custom: 129- return mindspore::kSuccess; 130- default: { 131- MS_LOG(WARNING) << "No primitive type :" << (int)(type); 132- return mindspore::kLiteSuccessExit; 133- } 134- } 135- return mindspore::kSuccess; 136- } else { 137- MS_LOG(ERROR) << "primitive is nullptr."; 138- return mindspore::kLiteError; 139- } 140-} 141 namespace { 142 bool NeedBitUppackCheck(const schema::Tensor &src_tensor) { 143 if (src_tensor.enableHuffmanCode()) { 144diff --git a/mindspore/lite/src/litert/delegate/nnrt/checker/primitive_check.h b/mindspore/lite/src/litert/delegate/nnrt/checker/primitive_check.h 145index dbdd812c..46b812c0 100644 146--- a/mindspore/lite/src/litert/delegate/nnrt/checker/primitive_check.h 147+++ b/mindspore/lite/src/litert/delegate/nnrt/checker/primitive_check.h 148@@ -4,7 +4,6 @@ 149 #include "include/api/status.h" 150 namespace mindspore { 151 namespace lite { 152-Status CheckPrimitiveSupported(const schema::Primitive *primitive); 153 Status CheckTensorSupported(const schema::Tensor *primitive); 154 } // namespace lite 155 } // namespace mindspore 156diff --git a/mindspore/lite/src/litert/delegate/nnrt/nnrt_allocator.cc b/mindspore/lite/src/litert/delegate/nnrt/nnrt_allocator.cc 157index f79c1682..b38fff62 100644 158--- a/mindspore/lite/src/litert/delegate/nnrt/nnrt_allocator.cc 159+++ b/mindspore/lite/src/litert/delegate/nnrt/nnrt_allocator.cc 160@@ -21,7 +21,6 @@ 161 #include <mutex> 162 #include "src/litert/delegate/nnrt/nnrt_allocator.h" 163 #include "src/common/log.h" 164-#include "interfaces/kits/c/neural_network_runtime/neural_network_runtime.h" 165 166 namespace mindspore { 167 namespace lite { 168@@ -29,23 +28,17 @@ NNRTAllocator::~NNRTAllocator() { 169 std::lock_guard<std::mutex> locker(mutex_); 170 for (auto &it : allocated_list_) { 171 auto membuf = it.second; 172- if (memory_category_ == NNRT_INPUT) { 173- OH_NNExecutor_DestroyInputMemory(executor_, index_, &(membuf->memory_)); 174- } else { 175- OH_NNExecutor_DestroyOutputMemory(executor_, index_, &(membuf->memory_)); 176- } 177- free(membuf); 178+ OH_NNTensor_Destroy(&membuf->tensor_); 179+ OH_NNTensorDesc_Destroy(&membuf->tensor_desc_); 180+ delete membuf; 181 } 182 allocated_list_.clear(); 183 184 for (auto &it : free_list_) { 185 auto membuf = it.second; 186- if (memory_category_ == NNRT_INPUT) { 187- OH_NNExecutor_DestroyInputMemory(executor_, index_, &(membuf->memory_)); 188- } else { 189- OH_NNExecutor_DestroyOutputMemory(executor_, index_, &(membuf->memory_)); 190- } 191- free(membuf); 192+ OH_NNTensor_Destroy(&membuf->tensor_); 193+ OH_NNTensorDesc_Destroy(&membuf->tensor_desc_); 194+ delete membuf; 195 } 196 free_list_.clear(); 197 } 198@@ -57,8 +50,8 @@ void *NNRTAllocator::Malloc(size_t size) { 199 auto membuf = iter->second; 200 membuf->ref_count_ = 0; 201 (void)free_list_.erase(iter); 202- allocated_list_[membuf->memory_->data] = membuf; 203- return membuf->memory_->data; 204+ allocated_list_[membuf->data] = membuf; 205+ return membuf->data; 206 } 207 208 auto membuf = new (std::nothrow) MemBuf(); 209@@ -66,30 +59,36 @@ void *NNRTAllocator::Malloc(size_t size) { 210 MS_LOG(ERROR) << "new Membuf failed."; 211 return nullptr; 212 } 213- 214 membuf->ref_count_ = 0; 215 if (memory_category_ == NNRT_INPUT) { 216- membuf->memory_ = OH_NNExecutor_AllocateInputMemory(executor_, index_, size); 217+ membuf->tensor_desc_ = OH_NNExecutor_CreateInputTensorDesc(executor_, index_); 218 } else { 219- membuf->memory_ = OH_NNExecutor_AllocateOutputMemory(executor_, index_, size); 220+ membuf->tensor_desc_ = OH_NNExecutor_CreateOutputTensorDesc(executor_, index_); 221 } 222- 223- if (membuf->memory_ == nullptr) { 224- MS_LOG(ERROR) << "malloc OH_NN_Memory return nullptr"; 225+ if (membuf->tensor_desc_ == nullptr) { 226+ MS_LOG(ERROR) << "OH_NNExecutor_CreateInput/OutputTensorDesc failed, i = " << index_; 227+ delete membuf; 228+ return nullptr; 229+ } 230+ membuf->tensor_ = OH_NNTensor_CreateWithSize(device_id_, membuf->tensor_desc_, size); 231+ if (membuf->tensor_ == nullptr) { 232+ MS_LOG(ERROR) << "OH_NNTensor_CreateWithSize failed, i = " << index_; 233+ OH_NNTensorDesc_Destroy(&membuf->tensor_desc_); 234+ delete membuf; 235 return nullptr; 236 } 237- if (membuf->memory_->data == nullptr) { 238- MS_LOG(ERROR) << "malloc OH_NN_Memory return nullptr"; 239- if (memory_category_ == NNRT_INPUT) { 240- OH_NNExecutor_DestroyInputMemory(executor_, index_, &(membuf->memory_)); 241- } else { 242- OH_NNExecutor_DestroyOutputMemory(executor_, index_, &(membuf->memory_)); 243- } 244+ membuf->data = OH_NNTensor_GetDataBuffer(membuf->tensor_); 245+ if (membuf->data == nullptr) { 246+ MS_LOG(ERROR) << "OH_NNTensor_GetDataBuffer failed, i = " << index_; 247+ OH_NNTensor_Destroy(&membuf->tensor_); 248+ OH_NNTensorDesc_Destroy(&membuf->tensor_desc_); 249+ delete membuf; 250 return nullptr; 251 } 252 253- allocated_list_[membuf->memory_->data] = membuf; 254- return membuf->memory_->data; 255+ membuf->size = size; 256+ allocated_list_[membuf->data] = membuf; 257+ return membuf->data; 258 } 259 260 void NNRTAllocator::Free(void *ptr) { 261@@ -105,12 +104,12 @@ void NNRTAllocator::Free(void *ptr) { 262 auto membuf = iter->second; 263 membuf->ref_count_ = 0; 264 (void)allocated_list_.erase(iter); 265- (void)free_list_.insert(std::make_pair(membuf->memory_->length, membuf)); 266+ (void)free_list_.insert(std::make_pair(membuf->size, membuf)); 267 } 268 269 int NNRTAllocator::RefCount(void *ptr) { 270 if (ptr == nullptr) { 271- return -1; 272+ return NNRT_ALLOCATION; 273 } 274 std::lock_guard<std::mutex> locker(mutex_); 275 auto iter = allocated_list_.find(ptr); 276@@ -163,6 +162,5 @@ int NNRTAllocator::IncRefCount(void *ptr, int ref_count) { 277 } 278 return -1; 279 } 280- 281 } // namespace lite 282 } // namespace mindspore 283\ No newline at end of file 284diff --git a/mindspore/lite/src/litert/delegate/nnrt/nnrt_allocator.h b/mindspore/lite/src/litert/delegate/nnrt/nnrt_allocator.h 285index f6721369..52e6def7 100644 286--- a/mindspore/lite/src/litert/delegate/nnrt/nnrt_allocator.h 287+++ b/mindspore/lite/src/litert/delegate/nnrt/nnrt_allocator.h 288@@ -23,6 +23,9 @@ 289 #include <map> 290 #include <mutex> 291 #include "include/api/allocator.h" 292+#include "src/tensor.h" 293+#include "interfaces/kits/c/neural_network_runtime/neural_network_runtime.h" 294+ 295 struct OH_NN_Memory; 296 struct OH_NNExecutor; 297 298@@ -32,8 +35,8 @@ enum MemoryCategory { NNRT_INPUT, NNRT_OUTPUT }; 299 300 class NNRTAllocator : public Allocator { 301 public: 302- NNRTAllocator(OH_NNExecutor *executor, int index, MemoryCategory memory_category) 303- : index_(index), memory_category_(memory_category), executor_(executor) {} 304+ NNRTAllocator(OH_NNExecutor *executor, int index, size_t device_id, MemoryCategory memory_category) 305+ : index_(index), device_id_(device_id), memory_category_(memory_category), executor_(executor) {} 306 ~NNRTAllocator() override; 307 308 void *Malloc(size_t size) override; 309@@ -42,14 +45,25 @@ class NNRTAllocator : public Allocator { 310 int SetRefCount(void *ptr, int ref_count) override; 311 int DecRefCount(void *ptr, int ref_count) override; 312 int IncRefCount(void *ptr, int ref_count) override; 313+ NN_Tensor *GetNNTensor(void *ptr) { 314+ auto iter = allocated_list_.find(ptr); 315+ if (iter != allocated_list_.end()) { 316+ return iter->second->tensor_; 317+ } 318+ return nullptr; 319+ } 320 321 private: 322 struct MemBuf { 323 std::atomic_int ref_count_{0}; 324- OH_NN_Memory *memory_{nullptr}; 325+ NN_TensorDesc *tensor_desc_{nullptr}; 326+ NN_Tensor *tensor_{nullptr}; 327+ void *data{nullptr}; 328+ size_t size{0}; 329 }; 330 331 int index_{0}; 332+ size_t device_id_{0}; 333 MemoryCategory memory_category_{NNRT_INPUT}; 334 OH_NNExecutor *executor_{nullptr}; 335 std::mutex mutex_; 336diff --git a/mindspore/lite/src/litert/delegate/nnrt/nnrt_delegate.cc b/mindspore/lite/src/litert/delegate/nnrt/nnrt_delegate.cc 337index d8450141..a949c910 100644 338--- a/mindspore/lite/src/litert/delegate/nnrt/nnrt_delegate.cc 339+++ b/mindspore/lite/src/litert/delegate/nnrt/nnrt_delegate.cc 340@@ -169,7 +169,7 @@ Status NNRTDelegate::CreateFullModelKernel(DelegateModel<schema::Primitive> *mod 341 } 342 OH_NNCompilation_Destroy(&nn_compilation); 343 344- auto nnrt_model_kernel = new (std::nothrow)NNRTModelKernel(nn_executor, model->inputs(), model->outputs()); 345+ auto nnrt_model_kernel = new (std::nothrow)NNRTModelKernel(nn_executor, nnrt_device_info_.device_id_, model->inputs(), model->outputs()); 346 if (nnrt_model_kernel == nullptr) { 347 OH_NNExecutor_Destroy(&nn_executor); 348 MS_LOG(ERROR) << "new NNRTModelKernel failed"; 349@@ -581,7 +581,7 @@ Status NNRTDelegate::CreateNNRTSubgraphKernels(DelegateModel<schema::Primitive> 350 continue ; 351 } 352 353- auto nnrt_model_kernel = new (std::nothrow)NNRTModelKernel(nn_executor, in_tensors, out_tensors); 354+ auto nnrt_model_kernel = new (std::nothrow)NNRTModelKernel(nn_executor, nnrt_device_info_.device_id_, in_tensors, out_tensors); 355 if (nnrt_model_kernel == nullptr) { 356 MS_LOG(ERROR) << "new NNRTModelKernel failed"; 357 return kLiteError; 358@@ -760,6 +760,15 @@ schema::Tensor *NNRTDelegate::TensorToSchemaTensor(Tensor *lite_tensor, schema:: 359 memcpy(tensor_buf, buf, fbb.GetSize()); 360 auto tensor = flatbuffers::GetRoot<schema::Tensor>(tensor_buf); 361 fbb.Clear(); 362+ if (tensor != nullptr) { 363+ // use to free tensor_buf 364+ auto iter = dequant_schema_tensors_buffer_map_.find(const_cast<schema::Tensor *>(tensor)); 365+ if (iter != dequant_schema_tensors_buffer_map_.end()) { 366+ MS_LOG(ERROR) << "schema tensor is duplicated."; 367+ return nullptr; 368+ } 369+ dequant_schema_tensors_buffer_map_[const_cast<schema::Tensor *>(tensor)] = tensor_buf; 370+ } 371 return const_cast<schema::Tensor *>(tensor); 372 } 373 374@@ -813,14 +822,6 @@ Status NNRTDelegate::DequantLiteGraph(LiteGraph *lite_graph) { 375 } 376 377 void NNRTDelegate::ShallowCopyLiteGraph(const lite::LiteGraph &lite_graph) { 378- Status ret; 379- for (auto node : lite_graph.all_nodes_) { 380- ret = lite::CheckPrimitiveSupported(static_cast<const schema::Primitive *>(node->primitive_)); 381- if (ret == kLiteError) { 382- MS_LOG(ERROR) << " primitive supported check failed."; 383- return; 384- } 385- } 386 std::vector<LiteGraph::Node *> node_list; 387 node_list.reserve(lite_graph.all_nodes_.size()); 388 // copy node 389@@ -856,7 +857,7 @@ void NNRTDelegate::ShallowCopyLiteGraph(const lite::LiteGraph &lite_graph) { 390 subgraph_list.emplace_back(new_subgraph); 391 } 392 for (auto tensor : lite_graph.all_tensors_) { 393- ret = lite::CheckTensorSupported(static_cast<const schema::Tensor *>(tensor)); 394+ Status ret = lite::CheckTensorSupported(static_cast<const schema::Tensor *>(tensor)); 395 if (ret == kLiteError) { 396 MS_LOG(ERROR) << "tensor supported check failed."; 397 return; 398@@ -921,10 +922,13 @@ NNRTDelegate::~NNRTDelegate() { 399 if (lite_graph_ != nullptr) { 400 MS_LOG(ERROR) << "Delete NNRTDelegate."; 401 } 402- for (auto iter : dequant_schema_tensors_) { 403- delete iter.second; 404- iter.second = nullptr; 405+ for (auto iter : dequant_schema_tensors_buffer_map_) { 406+ if (iter.second != nullptr) { 407+ free(iter.second); 408+ iter.second = nullptr; 409+ } 410 } 411+ dequant_schema_tensors_buffer_map_.clear(); 412 } 413 } // namespace lite 414 } // namespace mindspore 415diff --git a/mindspore/lite/src/litert/delegate/nnrt/nnrt_delegate.h b/mindspore/lite/src/litert/delegate/nnrt/nnrt_delegate.h 416index 778553ef..db2f0ee7 100644 417--- a/mindspore/lite/src/litert/delegate/nnrt/nnrt_delegate.h 418+++ b/mindspore/lite/src/litert/delegate/nnrt/nnrt_delegate.h 419@@ -95,6 +95,7 @@ class NNRTDelegate : public Delegate { 420 std::vector<OH_NNExecutor *> nn_executor_list_; 421 std::vector<Tensor *> *dequant_src_tensors_; 422 std::map<uint32_t, schema::Tensor *> dequant_schema_tensors_; 423+ std::map<schema::Tensor *, void *> dequant_schema_tensors_buffer_map_; 424 std::vector<schema::Tensor *> replaced_schema_tensors_; 425 }; 426 } // namespace lite 427diff --git a/mindspore/lite/src/litert/delegate/nnrt/nnrt_model_kernel.cc b/mindspore/lite/src/litert/delegate/nnrt/nnrt_model_kernel.cc 428index f83632dd..2a66d133 100644 429--- a/mindspore/lite/src/litert/delegate/nnrt/nnrt_model_kernel.cc 430+++ b/mindspore/lite/src/litert/delegate/nnrt/nnrt_model_kernel.cc 431@@ -19,7 +19,7 @@ 432 #include "litert/cxx_api/tensor/tensor_impl.h" 433 int mindspore::NNRTModelKernel::Prepare() { 434 for (size_t i = 0; i < inputs_.size(); i++) { 435- auto nnrt_allocator = std::make_shared<lite::NNRTAllocator>(oh_nn_executor, i, lite::NNRT_INPUT); 436+ auto nnrt_allocator = std::make_shared<lite::NNRTAllocator>(oh_nn_executor, i, device_id_, lite::NNRT_INPUT); 437 if (nnrt_allocator == nullptr) { 438 MS_LOG(ERROR) << "Create NNRTAllocator failed"; 439 return lite::RET_NULL_PTR; 440@@ -27,7 +27,7 @@ int mindspore::NNRTModelKernel::Prepare() { 441 inputs_[i].SetAllocator(nnrt_allocator); 442 } 443 for (size_t i = 0; i < outputs_.size(); i++) { 444- auto nnrt_allocator = std::make_shared<lite::NNRTAllocator>(oh_nn_executor, i, lite::NNRT_OUTPUT); 445+ auto nnrt_allocator = std::make_shared<lite::NNRTAllocator>(oh_nn_executor, i, device_id_, lite::NNRT_OUTPUT); 446 if (nnrt_allocator == nullptr) { 447 MS_LOG(ERROR) << "Create NNRTAllocator failed"; 448 return lite::RET_NULL_PTR; 449@@ -39,25 +39,33 @@ int mindspore::NNRTModelKernel::Prepare() { 450 451 int mindspore::NNRTModelKernel::Execute() { 452 MS_CHECK_TRUE_RET(this->outputs().empty() != true, lite::RET_ERROR); 453- zero_copy_ = this->outputs()[Index0].allocator() != nullptr; 454+ zero_copy_ = IS_NNRT_ALLOCATOR(this->outputs()[Index0].allocator()); 455 456+ if (!zero_copy_) { 457+ FreeNNTensor(); 458+ } 459+ nn_input_tensors_.clear(); 460+ nn_output_tensors_.clear(); 461+ nn_input_tensor_descs_.clear(); 462+ nn_output_tensor_descs_.clear(); 463 464- lite::STATUS ret_val = PrepareInputs(); 465+ lite::STATUS ret_val = SetInputs(); 466 if (ret_val != lite::RET_OK) { 467- MS_LOG(ERROR) << "NNRTModelKernel PrepareInputs failed, STATUS is " << ret_val; 468+ MS_LOG(ERROR) << "NNRTModelKernel SetInputs failed, STATUS is " << ret_val; 469 return ret_val; 470 } 471- ret_val = TransferOutputs(); 472+ ret_val = SetOutputs(); 473 if (ret_val != lite::RET_OK) { 474- MS_LOG(ERROR) << "NNRTModelKernel TransferOutputs failed, STATUS is " << ret_val; 475+ MS_LOG(ERROR) << "NNRTModelKernel SetOutputs failed, STATUS is " << ret_val; 476 return ret_val; 477 } 478 MS_LOG(INFO) << "Running NNRtModel Kernel..."; 479 OH_NN_ReturnCode ret_code; 480- ret_code = OH_NNExecutor_Run(this->oh_nn_executor); 481+ ret_code = OH_NNExecutor_RunSync(oh_nn_executor, nn_input_tensors_.data(), nn_input_tensors_.size(), 482+ nn_output_tensors_.data(), nn_output_tensors_.size()); 483 484 if (ret_code != OH_NN_SUCCESS) { 485- MS_LOG(ERROR) << "NNExecutor Run failed, OH_NN_ReturnCode = " << ret_code; 486+ MS_LOG(ERROR) << "OH_NNExecutor_RunSync Run failed, OH_NN_ReturnCode = " << ret_code; 487 return lite::RET_ERROR; 488 } 489 MS_LOG(INFO) << "Run NNRtModel Kernel success."; 490@@ -120,97 +128,107 @@ OH_NN_DataType mindspore::NNRTModelKernel::ConvertDataType(mindspore::DataType d 491 } 492 return oh_data_type; 493 } 494-int mindspore::NNRTModelKernel::PrepareInputs() { 495- auto input_tensors = this->inputs(); 496- for (size_t i = 0; i < input_tensors.size(); i++) { 497- auto tensor = input_tensors[i]; 498- auto tensor_shape = tensor.Shape(); 499- auto tmp_quant_param = tensor.QuantParams(); 500- OH_NN_QuantParam *quant_param = nullptr; 501- std::vector<uint32_t> bit_num; 502- std::vector<double> scale; 503- std::vector<int32_t> zero_point; 504- if (!tmp_quant_param.empty()) { 505- quant_param = (new (std::nothrow) OH_NN_QuantParam); 506- if (quant_param == nullptr) { 507- MS_LOG(ERROR) << "new OH_NN_QuantParam failed."; 508- return lite::RET_NULL_PTR; 509- } 510- for (auto qparam : tmp_quant_param) { 511- bit_num.emplace_back(qparam.bit_num); 512- scale.emplace_back(qparam.scale); 513- zero_point.emplace_back(qparam.zero_point); 514- } 515- quant_param->quantCount = tmp_quant_param.size(); 516- quant_param->numBits = bit_num.data(); 517- quant_param->scale = scale.data(); 518- quant_param->zeroPoint = zero_point.data(); 519+ 520+int mindspore::NNRTModelKernel::SetInputs() { 521+ if (!zero_copy_) { 522+ OH_NN_ReturnCode ret{OH_NN_FAILED}; 523+ size_t nn_input_count = 0; 524+ ret = OH_NNExecutor_GetInputCount(oh_nn_executor, &nn_input_count); 525+ if (ret != OH_NN_SUCCESS) { 526+ MS_LOG(ERROR) << "OH_NNExecutor_GetInputCount failed."; 527+ return lite::RET_ERROR; 528 } 529- auto oprend = new (std::nothrow) OH_NN_Tensor; 530- if (oprend == nullptr) { 531- MS_LOG(ERROR) << "new OH_NN_Tensor Failed"; 532+ if (nn_input_count != inputs_.size()) { 533+ MS_LOG(ERROR) << "input count is not equal between ms and nnrt."; 534 return lite::RET_ERROR; 535 } 536- oprend->dataType = ConvertDataType(tensor.DataType()); 537- oprend->dimensionCount = tensor_shape.size(); 538- 539- std::vector<int32_t> dimensions_list; 540- for (auto shape : tensor_shape) { 541- if (shape < INT32_MAX) { 542- dimensions_list.emplace_back(static_cast<int32_t>(shape)); 543- } else { 544- MS_LOG(ERROR) << "NNExecutor SetInput failed,tensor dimension is is too large, max dim = " << INT32_MAX 545- << ", but get dimension = " << shape; 546+ for (size_t i = 0; i < nn_input_count; i++) { 547+ NN_TensorDesc *tensor_desc_tmp = OH_NNExecutor_CreateInputTensorDesc(oh_nn_executor, i); 548+ if (tensor_desc_tmp == nullptr) { 549+ MS_LOG(ERROR) << "OH_NNExecutor_CreateInputTensorDesc failed, i = " << i; 550 return lite::RET_ERROR; 551 } 552+ nn_input_tensor_descs_.emplace_back(tensor_desc_tmp); 553+ NN_Tensor *tensor_tmp = OH_NNTensor_Create(device_id_, tensor_desc_tmp); 554+ if (tensor_tmp == nullptr) { 555+ MS_LOG(ERROR) << "OH_NNTensor_Create input failed, i = " << i; 556+ return lite::RET_ERROR; 557+ } 558+ nn_input_tensors_.emplace_back(tensor_tmp); 559+ void *nn_data = OH_NNTensor_GetDataBuffer(nn_input_tensors_[i]); 560+ size_t tensor_size; 561+ ret = OH_NNTensorDesc_GetByteSize(tensor_desc_tmp, &tensor_size); 562+ if (ret != OH_NN_SUCCESS || tensor_size != inputs_[i].DataSize()) { 563+ MS_LOG(ERROR) << "NN_Tensor size is not equal to MSTensor, i = " << i; 564+ return lite::RET_ERROR; 565+ } 566+ memcpy(nn_data, inputs_[i].MutableData(), inputs_[i].DataSize()); 567 } 568- oprend->dimensions = dimensions_list.data(); 569- oprend->quantParam = quant_param; 570- oprend->type = OH_NN_TENSOR; 571- MS_LOG_INFO << "input tensor: " << tensor.Name() << ", data: " << (void *)tensor.MutableData() 572- << ", size: " << tensor.DataSize(); 573- 574- OH_NN_ReturnCode ret_code; 575- if (zero_copy_) { 576- OH_NN_Memory mem{tensor.MutableData(), tensor.DataSize()}; 577- ret_code = OH_NNExecutor_SetInputWithMemory(oh_nn_executor, i, oprend, &mem); 578- } else { 579- ret_code = OH_NNExecutor_SetInput(oh_nn_executor, i, oprend, tensor.MutableData(), tensor.DataSize()); 580- } 581- 582- delete (oprend); 583- 584- if (!tmp_quant_param.empty()) { 585- free(quant_param); 586- quant_param = nullptr; 587- } 588- 589- if (ret_code != OH_NN_SUCCESS) { 590- MS_LOG(ERROR) << "NNExecutor SetInput failed, current input tensor is" << tensor.Name() 591- << "OH_NN_ReturnCode = " << ret_code; 592- return lite::RET_ERROR; 593+ } else { 594+ for (size_t i = 0; i < inputs_.size(); i++) { 595+ void *data = inputs_[i].MutableData(); 596+ NN_Tensor *tensor_tmp = reinterpret_cast<lite::NNRTAllocator *>(inputs_[i].allocator().get())->GetNNTensor(data); 597+ if (tensor_tmp == nullptr) { 598+ MS_LOG(ERROR) << "NNRTAllocator GetNNTensor failed, i = " << i; 599+ return lite::RET_ERROR; 600+ } 601+ nn_input_tensors_.emplace_back(tensor_tmp); 602 } 603 } 604- 605 return lite::RET_OK; 606 } 607-int mindspore::NNRTModelKernel::TransferOutputs() { 608- auto output_tensors = this->outputs(); 609- for (size_t i = 0; i < output_tensors.size(); i++) { 610- auto tensor = output_tensors[i]; 611 612- OH_NN_ReturnCode ret_code; 613- if (zero_copy_) { 614- OH_NN_Memory mem{tensor.MutableData(), tensor.DataSize()}; 615- ret_code = OH_NNExecutor_SetOutputWithMemory(oh_nn_executor, i, &mem); 616- } else { 617- ret_code = OH_NNExecutor_SetOutput(oh_nn_executor, i, tensor.MutableData(), tensor.DataSize()); 618+int mindspore::NNRTModelKernel::SetOutputs() { 619+ if (!zero_copy_) { 620+ OH_NN_ReturnCode ret{OH_NN_FAILED}; 621+ size_t nn_output_count = 0; 622+ ret = OH_NNExecutor_GetOutputCount(oh_nn_executor, &nn_output_count); 623+ if (ret != OH_NN_SUCCESS) { 624+ MS_LOG(ERROR) << "OH_NNExecutor_GetOutputCount failed."; 625+ return lite::RET_ERROR; 626 } 627- if (ret_code != OH_NN_SUCCESS) { 628- MS_LOG(ERROR) << "NNExecutor SetOutput failed, current out tensor is" << tensor.Name() 629- << ", OH_NN_ReturnCode = " << ret_code; 630+ if (nn_output_count != outputs_.size()) { 631+ MS_LOG(ERROR) << "output count is not equal between ms and nnrt."; 632 return lite::RET_ERROR; 633 } 634+ for (size_t i = 0; i < nn_output_count; i++) { 635+ NN_TensorDesc *tensor_desc_tmp = OH_NNExecutor_CreateOutputTensorDesc(oh_nn_executor, i); 636+ if (tensor_desc_tmp == nullptr) { 637+ MS_LOG(ERROR) << "OH_NNExecutor_CreateOutputTensorDesc failed, i = " << i; 638+ return lite::RET_ERROR; 639+ } 640+ nn_output_tensor_descs_.emplace_back(tensor_desc_tmp); 641+ NN_Tensor *tensor_tmp = OH_NNTensor_Create(device_id_, tensor_desc_tmp); 642+ if (tensor_tmp == nullptr) { 643+ MS_LOG(ERROR) << "OH_NNTensor_Create output failed, i = " << i; 644+ return lite::RET_ERROR; 645+ } 646+ nn_output_tensors_.emplace_back(tensor_tmp); 647+ auto data = OH_NNTensor_GetDataBuffer(nn_output_tensors_[i]); 648+ reinterpret_cast<LiteTensorImpl *>(outputs_[i].impl().get())->lite_tensor()->FreeData(); 649+ outputs_[i].SetData(data, false); 650+ } 651+ } else { 652+ for (size_t i = 0; i < outputs_.size(); i++) { 653+ void *data = outputs_[i].MutableData(); 654+ NN_Tensor *tensor_tmp = reinterpret_cast<lite::NNRTAllocator *>(outputs_[i].allocator().get())->GetNNTensor(data); 655+ if (tensor_tmp == nullptr) { 656+ MS_LOG(ERROR) << "NNRTAllocator GetNNTensor failed, i = " << i; 657+ return lite::RET_ERROR; 658+ } 659+ nn_output_tensors_.emplace_back(tensor_tmp); 660+ } 661 } 662 return lite::RET_OK; 663 } 664+ 665+void mindspore::NNRTModelKernel::FreeNNTensor() { 666+ for (size_t i = 0; i < nn_input_tensors_.size(); i++) { 667+ OH_NNTensor_Destroy(&nn_input_tensors_[i]); 668+ OH_NNTensorDesc_Destroy(&nn_input_tensor_descs_[i]); 669+ } 670+ for (size_t i = 0; i < nn_output_tensors_.size(); i++) { 671+ OH_NNTensor_Destroy(&nn_output_tensors_[i]); 672+ OH_NNTensorDesc_Destroy(&nn_output_tensor_descs_[i]); 673+ } 674+} 675diff --git a/mindspore/lite/src/litert/delegate/nnrt/nnrt_model_kernel.h b/mindspore/lite/src/litert/delegate/nnrt/nnrt_model_kernel.h 676index 33df925c..40800a2a 100644 677--- a/mindspore/lite/src/litert/delegate/nnrt/nnrt_model_kernel.h 678+++ b/mindspore/lite/src/litert/delegate/nnrt/nnrt_model_kernel.h 679@@ -31,9 +31,9 @@ class NNRTModelKernel : public kernel::Kernel { 680 * Because nnr can't run single op, but the whole model. So we decide to make the whole model into one kernel. 681 * */ 682 public: 683- NNRTModelKernel(OH_NNExecutor *oh_nn_executor, const std::vector<mindspore::MSTensor> &inputs, 684+ NNRTModelKernel(OH_NNExecutor *oh_nn_executor, size_t device_id, const std::vector<mindspore::MSTensor> &inputs, 685 const std::vector<mindspore::MSTensor> &outputs) 686- : kernel::Kernel(inputs, outputs, nullptr, nullptr), oh_nn_executor(oh_nn_executor) {} 687+ : kernel::Kernel(inputs, outputs, nullptr, nullptr), device_id_(device_id), oh_nn_executor(oh_nn_executor) {} 688 int Prepare() override; 689 int Execute() override; 690 int ReSize() override { 691@@ -41,14 +41,23 @@ class NNRTModelKernel : public kernel::Kernel { 692 return lite::RET_ERROR; 693 }; 694 OH_NN_DataType ConvertDataType(mindspore::DataType data_type); 695- int PrepareInputs(); 696- int TransferOutputs(); 697+ int SetInputs(); 698+ int SetOutputs(); 699+ void FreeNNTensor(); 700 ~NNRTModelKernel() override { 701+ if (!zero_copy_) { 702+ FreeNNTensor(); 703+ } 704 MS_LOG(INFO) << "NNRTModelKernel Destroy."; 705 } 706 707 protected: 708+ size_t device_id_; 709 OH_NNExecutor *oh_nn_executor = nullptr; 710+ std::vector<NN_Tensor *> nn_input_tensors_; 711+ std::vector<NN_TensorDesc *> nn_input_tensor_descs_; 712+ std::vector<NN_Tensor *> nn_output_tensors_; 713+ std::vector<NN_TensorDesc *> nn_output_tensor_descs_; 714 715 private: 716 bool zero_copy_{false}; 717diff --git a/mindspore/lite/src/tensor.h b/mindspore/lite/src/tensor.h 718index f2eb4d1a..501e28e5 100644 719--- a/mindspore/lite/src/tensor.h 720+++ b/mindspore/lite/src/tensor.h 721@@ -38,10 +38,12 @@ namespace lite { 722 #define STATIC_ALLOCATION -271964 723 #define RUNTIME_REFCOUNT 0x9999 724 #define OPENCL_ALLOCATOR_REFCOUNT -10000 725+#define NNRT_ALLOCATION -10001 726 #define IS_STATIC_ALLOCATOR(allocator) ((allocator != nullptr) && (allocator->RefCount(nullptr) == STATIC_ALLOCATION)) 727 #define IS_RUNTIME_ALLOCATOR(allocator) ((allocator != nullptr) && (allocator->RefCount(nullptr) == RUNTIME_REFCOUNT)) 728 #define IS_OPENCL_ALLOCATOR(allocator) \ 729 ((allocator != nullptr) && (allocator->RefCount(nullptr) == OPENCL_ALLOCATOR_REFCOUNT)) 730+#define IS_NNRT_ALLOCATOR(allocator) ((allocator != nullptr) && (allocator->RefCount(nullptr) == NNRT_ALLOCATION)) 731 732 struct LiteQuantParam { 733 double scale; 734-- 7352.17.1 736 737