From fcce2a2794417a6ff16148dbb751e402e476084a Mon Sep 17 00:00:00 2001 From: chengfeng27 Date: Tue, 23 Jul 2024 10:46:59 +0800 Subject: [PATCH] fix memory leak --- .../core/mindrt/src/thread/core_affinity.cc | 2 +- mindspore/lite/BUILD.gn | 5 +- mindspore/lite/src/common/mmap_utils.cc | 14 +- mindspore/lite/src/common/mmap_utils.h | 2 +- mindspore/lite/src/litert/cache_session.cc | 425 ++++++++++++++++++ mindspore/lite/src/litert/cache_session.h | 129 ++++++ .../src/litert/cxx_api/model/model_impl.cc | 36 +- .../delegate/nnrt/extension_options_parser.cc | 12 + .../delegate/nnrt/extension_options_parser.h | 2 + mindspore/lite/src/litert/lite_model.cc | 12 +- mindspore/lite/src/litert/lite_model.h | 2 +- mindspore/lite/src/litert/lite_session.h | 6 +- 12 files changed, 631 insertions(+), 16 deletions(-) create mode 100644 mindspore/lite/src/litert/cache_session.cc create mode 100644 mindspore/lite/src/litert/cache_session.h diff --git a/mindspore/core/mindrt/src/thread/core_affinity.cc b/mindspore/core/mindrt/src/thread/core_affinity.cc index 6886f743..6d13724f 100644 --- a/mindspore/core/mindrt/src/thread/core_affinity.cc +++ b/mindspore/core/mindrt/src/thread/core_affinity.cc @@ -217,7 +217,7 @@ int GetMaxFrequency(int core_id) { float CoreAffinity::GetServerFrequency() { float max_freq = -1.0f; -#if defined(__APPLE__) || defined(__MACOSX) || defined(_MSC_VER) || defined(_WIN32) +#if defined(__APPLE__) || defined(__MACOSX) || defined(_MSC_VER) || defined(_WIN32) || defined(MS_COMPILE_OHOS) return max_freq; // MHz #else // The CPU cores in the server of the numa architecture are the same. diff --git a/mindspore/lite/BUILD.gn b/mindspore/lite/BUILD.gn index acee9733..d8ed3b44 100644 --- a/mindspore/lite/BUILD.gn +++ b/mindspore/lite/BUILD.gn @@ -438,7 +438,10 @@ ohos_shared_library("mindspore_lib") { if (SUPPORT_NNRT) { if (mindspore_feature_nnrt_metagraph) { defines += [ "SUPPORT_NNRT_METAGRAPH" ] - sources += [ "src/litert/delegate/nnrt/hiai_foundation_wrapper.cc", ] + sources += [ + "src/litert/delegate/nnrt/hiai_foundation_wrapper.cc", + "src/litert/cache_session.cc", + ] print("enabled feature: mindspore_feature_nnrt_metagraph") } sources += [ diff --git a/mindspore/lite/src/common/mmap_utils.cc b/mindspore/lite/src/common/mmap_utils.cc index ca8f8d1e..0dd31f7c 100644 --- a/mindspore/lite/src/common/mmap_utils.cc +++ b/mindspore/lite/src/common/mmap_utils.cc @@ -24,7 +24,7 @@ namespace mindspore { namespace lite { -void *ReadFileByMmap(const std::string &file, size_t *size) { +void *ReadFileByMmap(const std::string &file, size_t *size, bool populate) { #if !defined(_WIN32) && !defined(_WIN64) && !defined(MS_COMPILE_IOS) auto real_path = RealPath(file.c_str()); auto fd = open(real_path.c_str(), O_RDONLY); @@ -39,7 +39,12 @@ void *ReadFileByMmap(const std::string &file, size_t *size) { return nullptr; } *size = fd_stat.st_size; - auto mmap_buffers = mmap(nullptr, *size, PROT_READ, MAP_SHARED | MAP_POPULATE, fd, 0); + void *mmap_buffers; + if (populate) { + mmap_buffers = mmap(nullptr, *size, PROT_READ, MAP_SHARED | MAP_POPULATE, fd, 0); + } else { + mmap_buffers = mmap(nullptr, *size, PROT_READ, MAP_SHARED, fd, 0); + } close(fd); if (mmap_buffers == MAP_FAILED) { MS_LOG(ERROR) << "Model mmap failed."; @@ -54,7 +59,10 @@ void *ReadFileByMmap(const std::string &file, size_t *size) { void UnmapMmapBuffer(void *buffer, size_t size) { #if !defined(_WIN32) && !defined(_WIN64) - (void)munmap(buffer, size); + auto ret = munmap(buffer, size); + if (ret != RET_OK) { + MS_LOG(ERROR) << "munmap failed ret: " << ret << ", err: " << strerror(errno); + } #else MS_LOG(ERROR) << "Mmap is unsupported on windows."; #endif diff --git a/mindspore/lite/src/common/mmap_utils.h b/mindspore/lite/src/common/mmap_utils.h index bdd7c9a5..d3b0ec5f 100644 --- a/mindspore/lite/src/common/mmap_utils.h +++ b/mindspore/lite/src/common/mmap_utils.h @@ -20,7 +20,7 @@ namespace mindspore { namespace lite { -void *ReadFileByMmap(const std::string &file, size_t *size); +void *ReadFileByMmap(const std::string &file, size_t *size, bool populate = true); void UnmapMmapBuffer(void *buffer, size_t size); } // namespace lite } // namespace mindspore diff --git a/mindspore/lite/src/litert/cache_session.cc b/mindspore/lite/src/litert/cache_session.cc new file mode 100644 index 00000000..7bafe3f7 --- /dev/null +++ b/mindspore/lite/src/litert/cache_session.cc @@ -0,0 +1,425 @@ +/** + * Copyright 2024 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "cache_session.h" +#include "src/common/context_util.h" +#include "src/common/tensor_util.h" +#include "src/common/mmap_utils.h" +#include "src/common/file_utils.h" +#include "src/litert/delegate/nnrt/nnrt_model_kernel.h" + +namespace mindspore { +namespace lite { +CacheSession::~CacheSession() { + if (nn_executor_ != nullptr) { + OH_NNExecutor_Destroy(&nn_executor_); + MS_LOG(INFO) << "Destroy NNExecutor Finish."; + } +} + +int CacheSession::CompileGraph(Model *model) { + bool expected = false; + if (!is_running_.compare_exchange_strong(expected, true)) { + MS_LOG(ERROR) << "Not support multi-threading"; + return RET_ERROR; + } + // Convert to abstract base model interface + auto ret = ConvertInOutTensors(model); + context_->set_schema_version(reinterpret_cast(model)->GetSchemaVersion()); + if (ret != RET_OK) { + MS_LOG(ERROR) << "ConvertTensors failed: " << ret; + is_running_.store(false); + return ret; + } + InitGraphInputTensors(model); + InitGraphOutputTensors(model); + + // create NNRt kernel + ret = ScheduleToNNRTKernel(); + if (ret != RET_OK) { + MS_LOG(ERROR) << "Schedule NNRt kernel failed: " << ret; + is_running_.store(false); + return ret; + } + + InitGraphInOutTensorsMap(model); + ret = PrepareKernels(model); + if (ret != RET_OK) { + MS_LOG(ERROR) << "Prepare kernels failed: " << ret; + is_running_.store(false); + return ret; + } + + ret = InitExecutor(); + if (ret != RET_OK) { + MS_LOG(ERROR) << "InitExecutor failed: " << ret; + is_running_.store(false); + return ret; + } + + MarkSharedWeight(kernels_); + FreePackOpWeight(kernels_); + + is_running_.store(false); + return RET_OK; +} + +int CacheSession::InitExecutor() { + executor_ = new (std::nothrow) Executor(); + if (executor_ == nullptr) { + MS_LOG(ERROR) << "New Executor failed"; + return RET_ERROR; + } + auto ret = executor_->Prepare(kernels_, inputs_, outputs_, context_.get()); + if (ret != RET_OK) { + MS_LOG(ERROR) << "Prepare executor failed: " << ret; + return ret; + } + return RET_OK; +} + +int CacheSession::ConvertInOutTensors(const lite::Model *model) { + MS_ASSERT(model != nullptr); + auto lite_model = reinterpret_cast(model); + uint32_t tensor_count = model->graph_.all_tensors_.size(); + auto model_input_indices = model->graph_.input_indices_; + auto model_output_indices = model->graph_.output_indices_; + + for (uint32_t i = 0; i < tensor_count; ++i) { + auto *src_tensor = model->graph_.all_tensors_[i]; + if (!IsContain(model_input_indices, i) && !IsContain(model_output_indices, i)) { + this->tensors_.emplace_back(nullptr); + continue; + } + if (src_tensor == nullptr) { + MS_LOG(ERROR) << i << "th tensor in model is nullptr"; + return RET_NULL_PTR; + } + auto *dst_tensor = ConvertTensor(*src_tensor); + if (dst_tensor == nullptr) { + MS_LOG(ERROR) << "Convert new " << i << "th tensor failed!"; + return RET_NULL_PTR; + } + auto ret = ConvertTensorsData(lite_model, i, dst_tensor); + if (ret != RET_OK) { + MS_LOG(ERROR) << "Convert data of " << i << "th tensor failed"; + delete dst_tensor; + return ret; + } + ConvertTensorsQuantParam(src_tensor, dst_tensor); + if (IsContain(model_input_indices, i)) { + dst_tensor->set_category(Category::GRAPH_INPUT); + } + if (IsContain(model_output_indices, i)) { + // a tensor is as both input and output, would be treated as an input. + if (!dst_tensor->IsGraphInput()) { + dst_tensor->set_category(Category::GRAPH_OUTPUT); + } + } + + ret = CheckTensorValid(dst_tensor); + if (ret != RET_OK) { + MS_LOG(ERROR) << "Check " << i << "th tensor failed"; + delete dst_tensor; + return ret; + } + + this->tensors_.emplace_back(dst_tensor); + } + return RET_OK; +} + +int CacheSession::Init(const std::shared_ptr &context) { + if (context == nullptr) { + MS_LOG(ERROR) << "context is nullptr"; + return RET_NULL_PTR; + } + bool expected = false; + if (!is_running_.compare_exchange_strong(expected, true)) { + MS_LOG(ERROR) << "Not support multi-threading"; + return RET_ERROR; + } + context_ = context; + auto ret = context_->Init(); + if (ret != RET_OK) { + MS_LOG(ERROR) << "Init Context failed"; + return ret; + } + ms_context_ = MSContextFromContext(context); + if (ms_context_ == nullptr) { + MS_LOG(ERROR) << "transfer context to ms context failed."; + return RET_NULL_PTR; + } + + auto iter = std::find_if(context_->device_list_.begin(), context_->device_list_.end(), + [](DeviceContext &device) { return device.device_type_ == lite::DT_NNRT; }); + if(iter == context_->device_list_.end()) { + MS_LOG(ERROR) << "Found non NNRT device info"; + return RET_ERROR; + } + nnrt_device_info_ = iter->device_info_.nnrt_device_info_; + + const auto &extensions = nnrt_device_info_.extensions_; + mindspore::lite::nnrt::ExtensionOptionsParser::Parse(extensions, &extension_options_); + + is_running_.store(false); + return RET_OK; +} + +int CacheSession::ParseInputOutputFromModelBuffer(const char *model_buf, LiteModel *model) { + const void *meta_graph = nullptr; + meta_graph = reinterpret_cast(schema::GetMetaGraph(model_buf)); + assert(meta_graph != nullptr); + + auto status = GenerateModelInputOutput( + *reinterpret_cast(meta_graph), model->graph_); + if (status != RET_OK) { + MS_LOG(ERROR) << "fail to generate model"; + return status; + } + model->buf = const_cast(model_buf); + return RET_OK; +} + +int CacheSession::LoadModelAndCompileByPath(const std::string &model_path, mindspore::ModelType model_type) { + size_t model_size; + bool use_mmap = IsMmapEnable(); + auto model_buf = LoadModelByPath(model_path, model_type, &model_size, use_mmap); + if (model_buf == nullptr) { + MS_LOG(ERROR) << "Read model file failed"; + return RET_ERROR; + } + + Model *model = nullptr; + if (extension_options_.cache_path_.empty()) { + MS_LOG(ERROR) << "cache path is empty"; + return RET_ERROR; + } else { + model = ImportInOutFromBuffer(model_buf, model_size, true, model_type, model_path); + dynamic_cast(model)->PrepareInnerTensors(); + } + if (model == nullptr) { + MS_LOG(ERROR) << "Import model failed"; + return RET_ERROR; + } + + if (use_mmap) { + reinterpret_cast(model)->model_buf_by_mmap_ = true; + } else { + MS_LOG(WARNING) << "Memory may exceed the limit of business demands."; + } + (reinterpret_cast(model))->set_keep_model_buf(true); + auto ret = CompileGraph(model); + if (ret != lite::RET_OK) { + MS_LOG(ERROR) << "Compile model failed"; + model->buf = nullptr; + delete model; + return RET_ERROR; + } + set_model(model); + return RET_OK; +} + +Model *CacheSession::ImportInOutFromBuffer(const char *model_buf, size_t size, bool take_buf, mindspore::ModelType model_type, + const std::string &path) { + MS_LOG(INFO) << "import model from lite model"; + auto *model = new (std::nothrow) LiteModel(path); + if (model == nullptr) { + MS_LOG(ERROR) << "new model fail!"; + return nullptr; + } + + auto status = ParseInputOutputFromModelBuffer(model_buf, model); + if (status != RET_OK) { + MS_LOG(ERROR) << "construct model failed."; + delete model; + return nullptr; + } + model->buf = const_cast(model_buf); + model->buf_size_ = size; + return model; +} + +int CacheSession::ScheduleToNNRTKernel() { + if (!IsKirinNPUWithOnlineInference(nnrt_device_info_.device_id_)) { + MS_LOG(ERROR) << "only support NPU_ device."; + return RET_ERROR; + } + auto ret = CreateFullModelKernel(); + if (ret != kSuccess) { + MS_LOG(ERROR) << "Build npu model failed."; + return RET_ERROR; + } + return RET_OK; +} + +bool CacheSession::IsKirinNPUWithOnlineInference(size_t device_id) { + const std::string kirin_npu_name_prefix = "NPU_"; + const char *device_name; + auto ret = OH_NNDevice_GetName(device_id, &device_name); + if (ret != OH_NN_SUCCESS) { + MS_LOG(WARNING) << "Get name of device: " << device_id << " failed, error: " << ret; + return false; + } + + if (strncmp(kirin_npu_name_prefix.c_str(), device_name, kirin_npu_name_prefix.size()) != 0) { + MS_LOG(WARNING) << "strncmp: " << device_id << " failed, device_name: " << device_name; + return false; + } + return true; +} + +Status CacheSession::CreateFullModelKernel() { + OH_NNCompilation* nn_compilation = OH_NNCompilation_ConstructForCache(); + if (nn_compilation == nullptr) { + MS_LOG(ERROR) << "Construct NNCompilation failed"; + return kLiteError; + } + MS_LOG(DEBUG) << "NNRTDelegate creates NNCompilation success."; + + auto ret_code = InitNNCompilation(nn_compilation); + if (ret_code != kSuccess) { + MS_LOG(ERROR) << "Init NNCompilation failed"; + OH_NNCompilation_Destroy(&nn_compilation); + return kLiteError; + } + + OH_NNExecutor *nn_executor = nullptr; + nn_executor = OH_NNExecutor_Construct(nn_compilation); + if (nn_executor == nullptr) { + MS_LOG(ERROR) << "Construct NNExecutor failed, ret: " << ret_code; + OH_NNCompilation_Destroy(&nn_compilation); + return kLiteError; + } + OH_NNCompilation_Destroy(&nn_compilation); + + ms_inputs_ = LiteTensorsToMSTensors(inputs_); + ms_outputs_ = LiteTensorsToMSTensors(outputs_); + auto nnrt_model_kernel = new (std::nothrow) NNRTModelKernel(nn_executor, nnrt_device_info_, ms_inputs_, ms_outputs_); + if (nnrt_model_kernel == nullptr) { + OH_NNExecutor_Destroy(&nn_executor); + MS_LOG(ERROR) << "new NNRTModelKernel failed"; + return kLiteError; + } + nn_executor_ = nn_executor; + + std::shared_ptr shared_kernel(nnrt_model_kernel); + auto *kernel_exec = new (std::nothrow) kernel::KernelExec(shared_kernel); + if (kernel_exec == nullptr) { + MS_LOG(ERROR) << "nnrt kernel exec create failed."; + return kLiteError; + } + auto delegate_type = kNumberTypeFloat32; + for (auto &input : nnrt_model_kernel->inputs()) { + if (static_cast(input.DataType()) == kNumberTypeFloat16) { + delegate_type = kNumberTypeFloat16; + break; + } + } + kernel::KernelKey delegate_desc{kernel::kDelegate, delegate_type, NHWC, schema::PrimitiveType_NONE, "", ""}; + kernel_exec->set_desc(delegate_desc); + kernel_exec->set_context(context_.get()); + kernels_.push_back(kernel_exec); + + return kSuccess; +} + +Status CacheSession::InitNNCompilation(OH_NNCompilation *nn_compilation) const { + auto ret_code = OH_NNCompilation_SetDevice(nn_compilation, nnrt_device_info_.device_id_); + if (ret_code != OH_NN_SUCCESS) { + MS_LOG(ERROR) << "NNCompilation set device id failed, ret: " << ret_code; + return kLiteError; + } + ret_code = OH_NNCompilation_SetPerformanceMode(nn_compilation, + (OH_NN_PerformanceMode)(nnrt_device_info_.performance_mode_)); + if ((ret_code != OH_NN_SUCCESS) && (ret_code != OH_NN_OPERATION_FORBIDDEN)) { + MS_LOG(ERROR) << "NNCompilation set performance mode failed, ret: " << ret_code; + return kLiteError; + } + ret_code = OH_NNCompilation_SetPriority(nn_compilation, (OH_NN_Priority)(nnrt_device_info_.priority_)); + if ((ret_code != OH_NN_SUCCESS) && (ret_code != OH_NN_OPERATION_FORBIDDEN)) { + MS_LOG(ERROR) << "NNCompilation set priority failed, ret: " << ret_code; + return kLiteError; + } + ret_code = OH_NNCompilation_EnableFloat16(nn_compilation, nnrt_device_info_.enable_fp16_); + if ((ret_code != OH_NN_SUCCESS) && (ret_code != OH_NN_OPERATION_FORBIDDEN)) { + MS_LOG(ERROR) << "NNCompilation enable fp16 failed, ret: " << ret_code; + return kLiteError; + } + + if (!extension_options_.cache_path_.empty()) { + ret_code = OH_NNCompilation_SetCache(nn_compilation, extension_options_.cache_path_.c_str(), + extension_options_.cache_version_); + if ((ret_code != OH_NN_SUCCESS) && (ret_code != OH_NN_OPERATION_FORBIDDEN)) { + MS_LOG(ERROR) << "NNCompilation set cache failed, ret: " << ret_code; + return kLiteError; + } + } else { + MS_LOG(ERROR) << "NNCompilation must set Cache."; + return kLiteError; + } + + size_t extension_size = nnrt_device_info_.extensions_.size(); + for (size_t i = 0; i < extension_size; i++) { + auto &src_extensoin = nnrt_device_info_.extensions_[i]; + ret_code = OH_NNCompilation_AddExtensionConfig(nn_compilation, src_extensoin.name.c_str(), + (char *)((void *)src_extensoin.value.data()), + src_extensoin.value.size()); + if (ret_code != OH_NN_SUCCESS) { + MS_LOG(ERROR) << "OH_NNCompilation_AddExtensionConfig " << i << ": "<< src_extensoin.name << " failed, ret: " + << ret_code; + return kLiteError; + } + } + + ret_code = OH_NNCompilation_Build(nn_compilation); + if (ret_code != OH_NN_SUCCESS) { + MS_LOG(ERROR) << "Build NNCompilation failed, ret: " << ret_code; + return kLiteError; + } + return kSuccess; +} + +const char *CacheSession::LoadModelByPath(const std::string &file, mindspore::ModelType model_type, size_t *size, bool use_mmap) { + size_t buf_size; + char *model_buf; + if (use_mmap) { + model_buf = reinterpret_cast(lite::ReadFileByMmap(file.c_str(), &buf_size, false)); + } else { + MS_LOG(WARNING) << "Memory may exceed the limit of business demands."; + model_buf = lite::ReadFile(file.c_str(), &buf_size); + } + if (model_buf == nullptr) { + MS_LOG(ERROR) << "The model path is invalid"; + return model_buf; + } + + char *lite_buf = nullptr; + auto buf_model_type = LoadModelByBuff(model_buf, buf_size, &lite_buf, size, model_type); + if (buf_model_type == mindspore::ModelType::kUnknownType || lite_buf == nullptr) { + if (use_mmap) { + lite::UnmapMmapBuffer(const_cast(static_cast(model_buf)), buf_size); + } else { + delete[] model_buf; + } + model_buf = nullptr; + return nullptr; + } + + return lite_buf; +} +} // namespace lite +} // namespace mindspore diff --git a/mindspore/lite/src/litert/cache_session.h b/mindspore/lite/src/litert/cache_session.h new file mode 100644 index 00000000..f0ae185a --- /dev/null +++ b/mindspore/lite/src/litert/cache_session.h @@ -0,0 +1,129 @@ +/** + * Copyright 2024 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_LITE_SRC_LITERT_CACHE_SESSION_H_ +#define MINDSPORE_LITE_SRC_LITERT_CACHE_SESSION_H_ + +#include "src/litert/lite_session.h" +#include "src/litert/inner_context.h" +#include "src/litert/lite_model.h" +#include "src/litert/delegate/nnrt/extension_options_parser.h" +#include "interfaces/kits/c/neural_network_runtime/neural_network_runtime_type.h" +#include "interfaces/kits/c/neural_network_runtime/neural_network_runtime.h" +#include "interfaces/innerkits/c/neural_network_runtime_inner.h" + +namespace mindspore { +namespace lite { +class CacheSession : public LiteSession { + public: + CacheSession() = default; + ~CacheSession() override; + int Init(const std::shared_ptr &context) override; + int CompileGraph(Model *model) override; + int LoadModelAndCompileByPath(const std::string &model_path, mindspore::ModelType model_type) override; + static bool IsKirinNPUWithOnlineInference(size_t device_id); + const char *LoadModelByPath(const std::string &file, mindspore::ModelType model_type, size_t *size, + bool use_mmap) override; + Model* ImportInOutFromBuffer(const char *model_buf, size_t size, bool take_buf, + mindspore::ModelType model_type = mindspore::ModelType::kMindIR_Lite, + const std::string &path = ""); + + template + bool ConvertInputOutputTensors(const T &meta_graph, LiteGraph &graph_) { + if (meta_graph.allTensors() == nullptr) { + MS_LOG(ERROR) << "meta_graph is invalid, please check your model file."; + return false; + } + + graph_.all_tensors_.resize(meta_graph.allTensors()->size()); + MS_LOG(INFO) << "convert input/output tensors"; + for (auto i: graph_.input_indices_) { + auto *tensor = meta_graph.allTensors()->template GetAs(i); + if (tensor == nullptr) { + MS_LOG(ERROR) << i << " the input tensor in metagraph is nullptr"; + return false; + } + MS_CHECK_TRUE_RET(tensor->format() >= schema::Format_MIN && tensor->format() <= schema::Format_MAX, false); + graph_.all_tensors_[i] = (const_cast(tensor)); + } + + for (auto i: graph_.output_indices_) { + auto *tensor = meta_graph.allTensors()->template GetAs(i); + if (tensor == nullptr) { + MS_LOG(ERROR) << i << " the output tensor in metagraph is nullptr"; + } + MS_CHECK_TRUE_RET(tensor->format() >= schema::Format_MIN && tensor->format() <= schema::Format_MAX, false); + graph_.all_tensors_[i] = (const_cast(tensor)); + } + return true; + } + + template + int GenerateModelInputOutput(const T &meta_graph, LiteGraph &graph_) { + if (meta_graph.name() != nullptr) { + graph_.name_ = meta_graph.name()->c_str(); + } + if (meta_graph.version() != nullptr) { + graph_.version_ = meta_graph.version()->c_str(); + } + + if (meta_graph.inputIndex() == nullptr || meta_graph.outputIndex() == nullptr || + meta_graph.allTensors() == nullptr) { + MS_LOG(ERROR) << "meta_graph is invalid, please check your model file."; + return RET_ERROR; + } + + // converterInputOutput + auto in_count = meta_graph.inputIndex()->size(); + for (uint32_t i = 0; i < in_count; ++i) { + graph_.input_indices_.push_back(meta_graph.inputIndex()->Get(i)); + } + auto out_count = meta_graph.outputIndex()->size(); + for (uint32_t i = 0; i < out_count; ++i) { + graph_.output_indices_.push_back(meta_graph.outputIndex()->Get(i)); + } + + if (!ConvertInputOutputTensors(meta_graph, graph_)) { + MS_LOG(ERROR) << "convert tensor failed"; + return RET_ERROR; + } + return RET_OK; + } + + int ParseInputOutputFromModelBuffer(const char *model_buf, LiteModel *model); + int BindGLTexture2DMemory(const std::map &inputGLTexture, + std::map *outputGLTexture) override { + return RET_ERROR; + } + + protected: + int ScheduleToNNRTKernel(); + Status CreateFullModelKernel(); + Status InitNNCompilation(OH_NNCompilation *nn_compilation) const; + int ConvertInOutTensors(const lite::Model *model); + int InitExecutor() override; + std::vector ms_inputs_; + std::vector ms_outputs_; + + private: + NNRtDeviceInfo nnrt_device_info_; + OH_NNExecutor *nn_executor_{nullptr}; + nnrt::ExtensionOptions extension_options_; +}; +} // namespace lite +} // namespace mindspore + +#endif // MINDSPORE_LITE_SRC_LITERT_CACHE_SESSION_H_ diff --git a/mindspore/lite/src/litert/cxx_api/model/model_impl.cc b/mindspore/lite/src/litert/cxx_api/model/model_impl.cc index 02533dc3..cacbf86e 100644 --- a/mindspore/lite/src/litert/cxx_api/model/model_impl.cc +++ b/mindspore/lite/src/litert/cxx_api/model/model_impl.cc @@ -39,6 +39,11 @@ #include "src/common/config_file.h" #include "src/litert/cpu_info.h" #include "src/litert/pack_weight_manager.h" +#ifdef SUPPORT_NNRT_METAGRAPH +#include "src/litert/cache_session.h" +#include "src/litert/delegate/nnrt/extension_options_parser.h" +#endif + namespace mindspore { namespace { const char *const kExecutionPlan = "execution_plan"; @@ -1006,7 +1011,36 @@ float ModelImpl::GetLearningRate() { } lite::LiteSession *ModelImpl::CreateLiteSession(const std::shared_ptr &context) { - auto session = new (std::nothrow) lite::LiteSession(); + if (context == nullptr) { + MS_LOG(ERROR) << "context is nullptr"; + return nullptr; + } + lite::LiteSession *session = nullptr; +#ifdef SUPPORT_NNRT_METAGRAPH + auto iter = std::find_if(context->device_list_.begin(), context->device_list_.end(), + [](lite::DeviceContext &device) { return device.device_type_ == lite::DT_NNRT; }); + if(iter != context->device_list_.end()) { + const auto &nnrt_device_info = iter->device_info_.nnrt_device_info_; + if (lite::CacheSession::IsKirinNPUWithOnlineInference(nnrt_device_info.device_id_)) { + const auto &extensions = nnrt_device_info.extensions_; + lite::nnrt::ExtensionOptions extension_options; + mindspore::lite::nnrt::ExtensionOptionsParser::Parse(extensions, &extension_options); + auto has_cache = OH_NNModel_HasCache(extension_options.cache_path_.c_str(), extension_options.model_name.c_str(), + extension_options.cache_version_); + if (has_cache) { + session = reinterpret_cast(new (std::nothrow) lite::CacheSession()); + if (session == nullptr) { + MS_LOG(ERROR) << "create cache session failed"; + return nullptr; + } + } + } + } +#endif + + if (session == nullptr) { + session = new (std::nothrow) lite::LiteSession(); + } if (session == nullptr) { MS_LOG(ERROR) << "create session failed"; return nullptr; diff --git a/mindspore/lite/src/litert/delegate/nnrt/extension_options_parser.cc b/mindspore/lite/src/litert/delegate/nnrt/extension_options_parser.cc index e35cc2a5..a66cd5ea 100644 --- a/mindspore/lite/src/litert/delegate/nnrt/extension_options_parser.cc +++ b/mindspore/lite/src/litert/delegate/nnrt/extension_options_parser.cc @@ -30,6 +30,7 @@ const std::string kCachePath = "CachePath"; const std::string kCacheVersion = "CacheVersion"; const std::string kBandMode = "BandMode"; const std::string kQuantConfigData = "QuantConfigData"; +const std::string kModelName = "ModelName"; } // namespace int ExtensionOptionsParser::Parse(const std::vector &extensions, ExtensionOptions *param) { @@ -39,6 +40,7 @@ int ExtensionOptionsParser::Parse(const std::vector &extensions, Exte DoParseCacheVersion(extensions, ¶m->cache_version_); DoParseBondMode(extensions, ¶m->band_mode); DoParseQuantConfig(extensions, ¶m->quant_config, ¶m->quant_config_size, ¶m->is_optional_quant_setted); + DoParseModelName(extensions, ¶m->model_name); return RET_OK; } @@ -89,4 +91,14 @@ void ExtensionOptionsParser::DoParseQuantConfig(const std::vector &ex *quant_setted = true; } } + +void ExtensionOptionsParser::DoParseModelName(const std::vector &extensions, std::string *model_name) { + MS_CHECK_TRUE_RET_VOID(model_name != nullptr); + auto iter_config = std::find_if(extensions.begin(), extensions.end(), [](const Extension &extension) { + return extension.name == kModelName; + }); + if (iter_config != extensions.end()) { + *model_name = std::string(iter_config->value.begin(), iter_config->value.end()); + } +} } // mindspore::lite::nnrt \ No newline at end of file diff --git a/mindspore/lite/src/litert/delegate/nnrt/extension_options_parser.h b/mindspore/lite/src/litert/delegate/nnrt/extension_options_parser.h index f24682ce..9a030ad6 100644 --- a/mindspore/lite/src/litert/delegate/nnrt/extension_options_parser.h +++ b/mindspore/lite/src/litert/delegate/nnrt/extension_options_parser.h @@ -29,6 +29,7 @@ struct ExtensionOptions { void *quant_config; size_t quant_config_size = 0; bool is_optional_quant_setted = false; + std::string model_name = ""; }; class ExtensionOptionsParser { @@ -41,6 +42,7 @@ private: bool *quant_setted); static void DoParseCachePath(const std::vector &extensions, std::string *cache_path); static void DoParseCacheVersion(const std::vector &extensions, uint32_t *cache_version); + static void DoParseModelName(const std::vector &extensions, std::string *model_name); }; } // namespace mindspore::lite::nnrt diff --git a/mindspore/lite/src/litert/lite_model.cc b/mindspore/lite/src/litert/lite_model.cc index 006bc02c..5acf5760 100644 --- a/mindspore/lite/src/litert/lite_model.cc +++ b/mindspore/lite/src/litert/lite_model.cc @@ -538,14 +538,16 @@ bool LiteModel::PrepareInnerTensors() { MS_LOG(ERROR) << "Create SchemaTensorWrapper return nullptr"; return false; } + if (graph_.all_tensors_.at(i) != nullptr) { #ifdef ENABLE_LITE_HELPER - if (!tensor_wrapper->Init(*(graph_.all_tensors_.at(i)), static_cast(schema_version_), dir, - infer_helpers)) { + if (!tensor_wrapper->Init(*(graph_.all_tensors_.at(i)), static_cast(schema_version_), dir, + infer_helpers)) { #else - if (!tensor_wrapper->Init(*(graph_.all_tensors_.at(i)), static_cast(schema_version_), dir)) { + if (!tensor_wrapper->Init(*(graph_.all_tensors_.at(i)), static_cast(schema_version_), dir)) { #endif - delete tensor_wrapper; - return false; + delete tensor_wrapper; + return false; + } } this->inner_all_tensors_[i] = tensor_wrapper; } diff --git a/mindspore/lite/src/litert/lite_model.h b/mindspore/lite/src/litert/lite_model.h index 647746a2..c0847c1e 100644 --- a/mindspore/lite/src/litert/lite_model.h +++ b/mindspore/lite/src/litert/lite_model.h @@ -66,13 +66,13 @@ class MS_API LiteModel : public Model { static int VersionVerify(flatbuffers::Verifier *verify); - private: #ifdef ENABLE_LITE_HELPER bool PrepareInnerTensors(mindspore::infer::helper::InferHelpers *infer_helpers = nullptr); #else bool PrepareInnerTensors(); #endif + private: bool CheckQuantAllInit(const flatbuffers::Vector> *quant_params); template diff --git a/mindspore/lite/src/litert/lite_session.h b/mindspore/lite/src/litert/lite_session.h index 64a5f6d3..487b382a 100644 --- a/mindspore/lite/src/litert/lite_session.h +++ b/mindspore/lite/src/litert/lite_session.h @@ -57,10 +57,10 @@ class MS_API LiteSession { #else int LoadModelAndCompileByBuf(const char *model_buf, mindspore::ModelType model_type, const size_t &buf_size); #endif - int LoadModelAndCompileByPath(const std::string &model_path, mindspore::ModelType model_type); + virtual int LoadModelAndCompileByPath(const std::string &model_path, mindspore::ModelType model_type); mindspore::ModelType LoadModelByBuff(const char *model_buf, const size_t &buf_size, char **lite_buf, size_t *size, mindspore::ModelType model_type); - const char *LoadModelByPath(const std::string &file, mindspore::ModelType model_type, size_t *size, bool use_mmap); + virtual const char *LoadModelByPath(const std::string &file, mindspore::ModelType model_type, size_t *size, bool use_mmap); virtual int Init(const std::shared_ptr &context); virtual void BindThread(bool if_bind); virtual int CompileGraph(Model *model); @@ -168,10 +168,10 @@ class MS_API LiteSession { static void MarkSharedWeight(const std::vector &kernels); std::string ParseWeightPath(); bool IsMmapEnable(); + virtual int InitExecutor(); private: int PreCheck(Model *model); - int InitExecutor(); void ResetInputsShape(const std::vector> &dims); int ContextInit(const std::shared_ptr &context); int CreateTensorRTDelegate(); -- 2.17.1