From d8ae073cf0e9f10fcbd129a145927492f65edcca Mon Sep 17 00:00:00 2001 From: Zhu Guodong Date: Tue, 30 May 2023 12:17:40 +0800 Subject: [PATCH] auto-apply 0010-nnrt-delegate-supports-heterogeneous-predition.patch --- include/api/context.h | 40 ++ include/c_api/context_c.h | 85 +++ include/c_api/types_c.h | 36 + include/sdk_api/context.h | 85 +++ include/sdk_api/types.h | 38 +- mindspore/lite/include/context.h | 4 + mindspore/lite/include/model.h | 4 + mindspore/lite/src/runtime/c_api/context_c.cc | 196 ++++++ mindspore/lite/src/runtime/c_api/context_c.h | 3 + .../lite/src/runtime/c_api/type_c_private.h | 40 ++ mindspore/lite/src/runtime/cxx_api/context.cc | 68 ++ .../lite/src/runtime/cxx_api/converters.cc | 15 +- .../lite/src/runtime/cxx_api/converters.h | 2 +- .../runtime/delegate/nnrt/nnrt_delegate.cc | 633 +++++++++++++----- .../src/runtime/delegate/nnrt/nnrt_delegate.h | 54 +- .../src/runtime/delegate/nnrt/nnrt_stub.cc | 21 + mindspore/lite/src/runtime/lite_model.cc | 29 + mindspore/lite/src/runtime/lite_session.cc | 9 +- mindspore/lite/src/runtime/tensor_category.cc | 4 + mindspore/lite/src/runtime/tensor_category.h | 1 + mindspore/lite/test/CMakeLists.txt | 10 +- .../nnrt_delegate/nnrt_delegate_tests.cc | 59 ++ 22 files changed, 1229 insertions(+), 207 deletions(-) create mode 100644 mindspore/lite/src/runtime/c_api/type_c_private.h create mode 100644 mindspore/lite/test/ut/src/runtime/nnrt_delegate/nnrt_delegate_tests.cc diff --git a/include/api/context.h b/include/api/context.h index d88b9d44..4c25aa10 100644 --- a/include/api/context.h +++ b/include/api/context.h @@ -519,6 +519,46 @@ class MS_API NNRTDeviceInfo : public DeviceInfoContext { /// /// \return Type of this DeviceInfoContext. enum DeviceType GetDeviceType() const override { return DeviceType::kNNRt; }; + + /// \brief Set device id. + /// + /// \param[in] device_id The device id. + void SetDeviceID(size_t device_id); + + /// \brief Get the device id. + /// + /// \return The device id. + size_t GetDeviceID() const; + + /// \brief Set performance mode. + /// + /// \param[in] performance_mode The performance mode. + void SetPerformanceMode(int performance_mode); + + /// \brief Get performance mode. + /// + /// \return The priority. + int GetPerformanceMode() const; + + /// \brief Set priority. + /// + /// \param[in] priority The priority. + void SetPriority(int priority); + + /// \brief Get priority. + /// + /// \return The priority. + int GetPriority() const; + + /// \brief Set enables to perform the float16 inference + /// + /// \param[in] is_fp16 Enable float16 inference or not. + void SetEnableFP16(bool is_fp16); + + /// \brief Get enables to perform the float16 inference + /// + /// \return Whether enable float16 inference. + bool GetEnableFP16() const; }; } // namespace mindspore #endif // MINDSPORE_INCLUDE_API_CONTEXT_H diff --git a/include/c_api/context_c.h b/include/c_api/context_c.h index 53839e80..09220f20 100644 --- a/include/c_api/context_c.h +++ b/include/c_api/context_c.h @@ -173,6 +173,91 @@ OH_AI_API void OH_AI_DeviceInfoSetFrequency(OH_AI_DeviceInfoHandle device_info, /// \return NPU frequency OH_AI_API int OH_AI_DeviceInfoGetFrequency(const OH_AI_DeviceInfoHandle device_info); +/// \brief Obtain the all device descriptions in NNRT. +/// +/// \param[out] num Number of NNRT device description. +/// +/// \return NNRT device description array. +OH_AI_API NNRTDeviceDesc *OH_AI_GetAllNNRTDeviceDescs(size_t *num); + +/// \brief Destroy the NNRT device descriptions returned by OH_AI_GetAllNNRTDeviceDescs(). +/// +/// \param[in] desc NNRT device description array. +OH_AI_API void OH_AI_DestroyAllNNRTDeviceDescs(NNRTDeviceDesc **desc); + +/// \brief Obtain the device id in NNRT device description. +/// +/// \param[in] desc pointer to the NNRT device description instance. +/// +/// \return NNRT device id. +OH_AI_API size_t OH_AI_GetDeviceIdFromNNRTDeviceDesc(const NNRTDeviceDesc *desc); + +/// \brief Obtain the device name in NNRT device description. +/// +/// \param[in] desc pointer to the NNRT device description instance. +/// +/// \return NNRT device name. +OH_AI_API const char *OH_AI_GetNameFromNNRTDeviceDesc(const NNRTDeviceDesc *desc); + +/// \brief Obtain the device type in NNRT device description. +/// +/// \param[in] desc pointer to the NNRT device description instance. +/// +/// \return NNRT device type. +OH_AI_API OH_AI_NNRTDeviceType OH_AI_GetTypeFromNNRTDeviceDesc(const NNRTDeviceDesc *desc); + +/// \brief Create the NNRT device info by exactly matching the specific device name. +/// +/// \param[in] name NNRt device name. +/// +/// \return Device info object handle. +OH_AI_API OH_AI_DeviceInfoHandle OH_AI_CreateNNRTDeviceInfoByName(const char *name); + +/// \brief Create the NNRT device info by finding the first device with the specific device type. +/// +/// \param[in] name NNRt device type. +/// +/// \return Device info object handle. +OH_AI_API OH_AI_DeviceInfoHandle OH_AI_CreateNNRTDeviceInfoByType(OH_AI_NNRTDeviceType type); + +/// \brief Set the NNRT device id, Only valid for NNRT. +/// +/// \param[in] device_info Device info object handle. +/// \param[in] device_id NNRT device id. +OH_AI_API void OH_AI_DeviceInfoSetDeviceId(OH_AI_DeviceInfoHandle device_info, size_t device_id); + +/// \brief Obtain the NNRT device id, Only valid for NNRT. +/// +/// \param[in] device_info Device info object handle. +/// +/// \return NNRT device id. +OH_AI_API size_t OH_AI_DeviceInfoGetDeviceId(const OH_AI_DeviceInfoHandle device_info); + +/// \brief Set the NNRT performance mode, Only valid for NNRT. +/// +/// \param[in] device_info Device info object handle. +/// \param[in] device_id NNRT performance mode. +OH_AI_API void OH_AI_DeviceInfoSetPerformanceMode(OH_AI_DeviceInfoHandle device_info, OH_AI_PerformanceMode mode); + +/// \brief Obtain the NNRT performance mode, Only valid for NNRT. +/// +/// \param[in] device_info Device info object handle. +/// +/// \return NNRT performance mode. +OH_AI_API OH_AI_PerformanceMode OH_AI_DeviceInfoGetPerformanceMode(const OH_AI_DeviceInfoHandle device_info); + +/// \brief Set the NNRT priority, Only valid for NNRT. +/// +/// \param[in] device_info Device info object handle. +/// \param[in] device_id NNRT priority. +OH_AI_API void OH_AI_DeviceInfoSetPriority(OH_AI_DeviceInfoHandle device_info, OH_AI_Priority priority); + +/// \brief Obtain the NNRT priority, Only valid for NNRT. +/// +/// \param[in] device_info Device info object handle. +/// +/// \return NNRT priority. +OH_AI_API OH_AI_Priority OH_AI_DeviceInfoGetPriority(const OH_AI_DeviceInfoHandle device_info); #ifdef __cplusplus } #endif diff --git a/include/c_api/types_c.h b/include/c_api/types_c.h index fdf91f5a..d612eb97 100644 --- a/include/c_api/types_c.h +++ b/include/c_api/types_c.h @@ -44,6 +44,42 @@ typedef enum OH_AI_DeviceType { OH_AI_DEVICETYPE_INVALID = 100, } OH_AI_DeviceType; +typedef enum OH_AI_NNRTDeviceType { + /** Devices that are not CPU, GPU, or dedicated accelerator */ + OH_AI_NNRTDEVICE_OTHERS = 0, + /** CPU device */ + OH_AI_NNRTDEVICE_CPU = 1, + /** GPU device */ + OH_AI_NNRTDEVICE_GPU = 2, + /** Dedicated hardware accelerator */ + OH_AI_NNRTDEVICE_ACCELERATOR = 3, +} OH_AI_NNRTDeviceType; + +typedef enum OH_AI_PerformanceMode { + /** No performance mode preference */ + OH_AI_PERFORMANCE_NONE = 0, + /** Low power consumption mode*/ + OH_AI_PERFORMANCE_LOW = 1, + /** Medium performance mode */ + OH_AI_PERFORMANCE_MEDIUM = 2, + /** High performance mode */ + OH_AI_PERFORMANCE_HIGH = 3, + /** Ultimate performance mode */ + OH_AI_PERFORMANCE_EXTREME = 4 +} OH_AI_PerformanceMode; + +typedef enum OH_AI_Priority { + /** No priority preference */ + OH_AI_PRIORITY_NONE = 0, + /** Low priority */ + OH_AI_PRIORITY_LOW = 1, + /** Medium priority */ + OH_AI_PRIORITY_MEDIUM = 2, + /** High priority */ + OH_AI_PRIORITY_HIGH = 3 +} OH_AI_Priority; + +typedef struct NNRTDeviceDesc NNRTDeviceDesc; #ifdef __cplusplus } #endif diff --git a/include/sdk_api/context.h b/include/sdk_api/context.h index 5bfc9279..bf0ff0a6 100644 --- a/include/sdk_api/context.h +++ b/include/sdk_api/context.h @@ -174,6 +174,91 @@ OH_AI_API void OH_AI_DeviceInfoSetFrequency(OH_AI_DeviceInfoHandle device_info, /// \return NPU frequency OH_AI_API int OH_AI_DeviceInfoGetFrequency(const OH_AI_DeviceInfoHandle device_info); +/// \brief Obtain the all device descriptions in NNRT. +/// +/// \param[out] num Number of NNRT device description. +/// +/// \return NNRT device description array. +OH_AI_API NNRTDeviceDesc *OH_AI_GetAllNNRTDeviceDescs(size_t *num); + +/// \brief Destroy the NNRT device descriptions returned by OH_AI_NNRTGetAllDeviceDescs(). +/// +/// \param[in] desc NNRT device description array. +OH_AI_API void OH_AI_DestroyAllNNRTDeviceDescs(NNRTDeviceDesc **desc); + +/// \brief Obtain the device id in NNRT device description. +/// +/// \param[in] desc pointer to the NNRT device description instance. +/// +/// \return NNRT device id. +OH_AI_API size_t OH_AI_GetDeviceIdFromNNRTDeviceDesc(const NNRTDeviceDesc *desc); + +/// \brief Obtain the device name in NNRT device description. +/// +/// \param[in] desc pointer to the NNRT device description instance. +/// +/// \return NNRT device name. +OH_AI_API const char *OH_AI_GetNameFromNNRTDeviceDesc(const NNRTDeviceDesc *desc); + +/// \brief Obtain the device type in NNRT device description. +/// +/// \param[in] desc pointer to the NNRT device description instance. +/// +/// \return NNRT device type. +OH_AI_API OH_AI_NNRTDeviceType OH_AI_GetTypeFromNNRTDeviceDesc(const NNRTDeviceDesc *desc); + +/// \brief Create the NNRT device info by exactly matching the specific device name. +/// +/// \param[in] name NNRt device name. +/// +/// \return Device info object handle. +OH_AI_API OH_AI_DeviceInfoHandle OH_AI_CreateNNRTDeviceInfoByName(const char *name); + +/// \brief Create the NNRT device info by finding the first device with the specific device type. +/// +/// \param[in] name NNRt device type. +/// +/// \return Device info object handle. +OH_AI_API OH_AI_DeviceInfoHandle OH_AI_CreateNNRTDeviceInfoByType(OH_AI_NNRTDeviceType type); + +/// \brief Set the NNRT device id, Only valid for NNRT. +/// +/// \param[in] device_info Device info object handle. +/// \param[in] device_id NNRT device id. +OH_AI_API void OH_AI_DeviceInfoSetDeviceId(OH_AI_DeviceInfoHandle device_info, size_t device_id); + +/// \brief Obtain the NNRT device id, Only valid for NNRT. +/// +/// \param[in] device_info Device info object handle. +/// +/// \return NNRT device id. +OH_AI_API size_t OH_AI_DeviceInfoGetDeviceId(const OH_AI_DeviceInfoHandle device_info); + +/// \brief Set the NNRT performance mode, Only valid for NNRT. +/// +/// \param[in] device_info Device info object handle. +/// \param[in] device_id NNRT performance mode. +OH_AI_API void OH_AI_DeviceInfoSetPerformanceMode(OH_AI_DeviceInfoHandle device_info, OH_AI_PerformanceMode mode); + +/// \brief Obtain the NNRT performance mode, Only valid for NNRT. +/// +/// \param[in] device_info Device info object handle. +/// +/// \return NNRT performance mode. +OH_AI_API OH_AI_PerformanceMode OH_AI_DeviceInfoGetPerformanceMode(const OH_AI_DeviceInfoHandle device_info); + +/// \brief Set the NNRT priority, Only valid for NNRT. +/// +/// \param[in] device_info Device info object handle. +/// \param[in] device_id NNRT priority. +OH_AI_API void OH_AI_DeviceInfoSetPriority(OH_AI_DeviceInfoHandle device_info, OH_AI_Priority priority); + +/// \brief Obtain the NNRT priority, Only valid for NNRT. +/// +/// \param[in] device_info Device info object handle. +/// +/// \return NNRT priority. +OH_AI_API OH_AI_Priority OH_AI_DeviceInfoGetPriority(const OH_AI_DeviceInfoHandle device_info); #ifdef __cplusplus } #endif diff --git a/include/sdk_api/types.h b/include/sdk_api/types.h index a39c6daa..d38660b0 100644 --- a/include/sdk_api/types.h +++ b/include/sdk_api/types.h @@ -40,10 +40,46 @@ typedef enum OH_AI_DeviceType { OH_AI_DEVICETYPE_KIRIN_NPU, // add new type here // ohos-only device range: [60, 80) - OH_AI_DeviceType_NNRT = 60, + OH_AI_DEVICETYPE_NNRT = 60, OH_AI_DEVICETYPE_INVALID = 100, } OH_AI_DeviceType; +typedef enum OH_AI_NNRTDeviceType { + /** Devices that are not CPU, GPU, or dedicated accelerator */ + OH_AI_NNRTDEVICE_OTHERS = 0, + /** CPU device */ + OH_AI_NNRTDEVICE_CPU = 1, + /** GPU device */ + OH_AI_NNRTDEVICE_GPU = 2, + /** Dedicated hardware accelerator */ + OH_AI_NNRTDEVICE_ACCELERATOR = 3, +} OH_AI_NNRTDeviceType; + +typedef enum OH_AI_PerformanceMode { + /** No performance mode preference */ + OH_AI_PERFORMANCE_NONE = 0, + /** Low power consumption mode*/ + OH_AI_PERFORMANCE_LOW = 1, + /** Medium performance mode */ + OH_AI_PERFORMANCE_MEDIUM = 2, + /** High performance mode */ + OH_AI_PERFORMANCE_HIGH = 3, + /** Ultimate performance mode */ + OH_AI_PERFORMANCE_EXTREME = 4 +} OH_AI_PerformanceMode; + +typedef enum OH_AI_Priority { + /** No priority preference */ + OH_AI_PRIORITY_NONE = 0, + /** Low priority */ + OH_AI_PRIORITY_LOW = 1, + /** Medium priority */ + OH_AI_PRIORITY_MEDIUM = 2, + /** High priority */ + OH_AI_PRIORITY_HIGH = 3 +} OH_AI_Priority; + +typedef struct NNRTDeviceDesc NNRTDeviceDesc; #ifdef __cplusplus } #endif diff --git a/mindspore/lite/include/context.h b/mindspore/lite/include/context.h index 22bd24df..7e1e06f3 100644 --- a/mindspore/lite/include/context.h +++ b/mindspore/lite/include/context.h @@ -52,6 +52,10 @@ typedef struct AscendDeviceInfo { } AscendDeviceInfo; typedef struct NNRtDeviceInfo { + uint32_t device_id_ = 0; + int priority_ = 0; + int performance_mode_ = 0; + bool enable_fp16_ = false; } NNRtDeviceInfo; /// \brief DeviceInfo defined for backend's configuration information. diff --git a/mindspore/lite/include/model.h b/mindspore/lite/include/model.h index 44cba37b..a54904c8 100644 --- a/mindspore/lite/include/model.h +++ b/mindspore/lite/include/model.h @@ -24,6 +24,7 @@ namespace mindspore { namespace schema { struct Tensor; } // namespace schema + namespace lite { typedef enum { ModelType_MSLite, ModelType_MindIR } LiteModelType; @@ -61,7 +62,10 @@ struct LiteGraph { bool model_obfuscated_ = false; std::vector deobf_prims_; #endif + + std::string ToString() const; }; + struct Model { LiteGraph graph_; char *buf = nullptr; diff --git a/mindspore/lite/src/runtime/c_api/context_c.cc b/mindspore/lite/src/runtime/c_api/context_c.cc index d030e931..2a0a6d06 100644 --- a/mindspore/lite/src/runtime/c_api/context_c.cc +++ b/mindspore/lite/src/runtime/c_api/context_c.cc @@ -14,8 +14,13 @@ * limitations under the License. */ #include "include/c_api/context_c.h" +#include #include "src/runtime/c_api/context_c.h" +#include "src/runtime/c_api/type_c_private.h" #include "src/common/log_adapter.h" +#ifdef SUPPORT_NNRT +#include "interfaces/kits/c/neural_network_runtime.h" +#endif // ================ Context ================ OH_AI_ContextHandle OH_AI_ContextCreate() { @@ -238,3 +243,194 @@ int OH_AI_DeviceInfoGetFrequency(const OH_AI_DeviceInfoHandle device_info) { // return -1; } } + +NNRTDeviceDesc *OH_AI_GetAllNNRTDeviceDescs(size_t *num) { + if (num == nullptr) { + MS_LOG(ERROR) << "Input num is null"; + return nullptr; + } +#ifdef SUPPORT_NNRT + *num = 0; + + const size_t *all_device_ids; + uint32_t device_count; + auto ret = OH_NNDevice_GetAllDevicesID(&all_device_ids, &device_count); + if ((ret != OH_NN_SUCCESS) || (device_count == 0)) { + MS_LOG(ERROR) << "NNRT get all device id failed, ret: " << ret; + return nullptr; + } + + NNRTDeviceDesc *desc = (NNRTDeviceDesc *)malloc(sizeof(NNRTDeviceDesc) * device_count); + if (desc == nullptr) { + MS_LOG(ERROR) << "NNRT allocate desc failed"; + return nullptr; + } + + for (uint32_t i = 0; i < device_count; i++) { + desc[i].device_id = all_device_ids[i]; + OH_NN_DeviceType type; + (void)OH_NNDevice_GetType(all_device_ids[i], &type); + desc[i].device_type = static_cast(type); + + const char *name = nullptr; + (void)OH_NNDevice_GetName(all_device_ids[i], &name); + desc[i].device_name[127] = '\0'; + strncpy(desc[i].device_name, name, 127); + } + *num = device_count; + return desc; +#else + return nullptr; +#endif +} + +void OH_AI_DestroyAllNNRTDeviceDescs(NNRTDeviceDesc **desc) { + if (desc == nullptr) { + MS_LOG(WARNING) << "desc is null"; + return; + } + free(*desc); + *desc = nullptr; +} + +size_t OH_AI_GetDeviceIdFromNNRTDeviceDesc(const NNRTDeviceDesc *desc) { + if (desc == nullptr) { + MS_LOG(ERROR) << "NNRT desc is null"; + return 0; + } + return desc->device_id; +} + +const char *OH_AI_GetNameFromNNRTDeviceDesc(const NNRTDeviceDesc *desc) { + if (desc == nullptr) { + MS_LOG(ERROR) << "NNRT desc is null"; + return nullptr; + } + return desc->device_name; +} + +OH_AI_NNRTDeviceType OH_AI_GetTypeFromNNRTDeviceDesc(const NNRTDeviceDesc *desc) { + if (desc == nullptr) { + MS_LOG(ERROR) << "NNRT desc is null"; + return OH_AI_NNRTDeviceType::OH_AI_NNRTDEVICE_OTHERS; + } + return desc->device_type; +} + +OH_AI_DeviceInfoHandle OH_AI_CreateNNRTDeviceInfoByName(const char *name) { + size_t num = 0; + NNRTDeviceDesc *desc = OH_AI_GetAllNNRTDeviceDescs(&num); + if (desc == nullptr) { + MS_LOG(ERROR) << "Get all device desc failed"; + return nullptr; + } + + OH_AI_DeviceInfoHandle handle = nullptr; + for (size_t i = 0; i < num; i++) { + if (strncmp(desc[i].device_name, name, NNRT_DEVICE_NAME_MAX - 1) == 0) { + handle = OH_AI_DeviceInfoCreate(OH_AI_DEVICETYPE_NNRT); + OH_AI_DeviceInfoSetDeviceId(handle, desc[i].device_id); + break; + } + } + OH_AI_DestroyAllNNRTDeviceDescs(&desc); + return handle; +} + +OH_AI_DeviceInfoHandle OH_AI_CreateNNRTDeviceInfoByType(OH_AI_NNRTDeviceType type) { + size_t num = 0; + NNRTDeviceDesc *desc = OH_AI_GetAllNNRTDeviceDescs(&num); + if (desc == nullptr) { + MS_LOG(ERROR) << "Get all device desc failed"; + return nullptr; + } + + OH_AI_DeviceInfoHandle handle = nullptr; + for (size_t i = 0; i < num; i++) { + if (desc[i].device_type == type) { + handle = OH_AI_DeviceInfoCreate(OH_AI_DEVICETYPE_NNRT); + OH_AI_DeviceInfoSetDeviceId(handle, desc[i].device_id); + break; + } + } + OH_AI_DestroyAllNNRTDeviceDescs(&desc); + return handle; +} + +void OH_AI_DeviceInfoSetDeviceId(OH_AI_DeviceInfoHandle device_info, size_t device_id) { + if (device_info == nullptr) { + MS_LOG(ERROR) << "device info is null"; + return; + } + if (OH_AI_DeviceInfoGetDeviceType(device_info) != OH_AI_DEVICETYPE_NNRT) { + MS_LOG(ERROR) << "Set device_id of non-NNRT device is not allowable, ignored"; + return; + } + auto impl = reinterpret_cast(device_info); + impl->device_id = device_id; +} + +size_t OH_AI_DeviceInfoGetDeviceId(const OH_AI_DeviceInfoHandle device_info) { + if (device_info == nullptr) { + MS_LOG(ERROR) << "device info is null"; + return 0; + } + if (OH_AI_DeviceInfoGetDeviceType(device_info) != OH_AI_DEVICETYPE_NNRT) { + MS_LOG(ERROR) << "Get device_id of non-NNRT device is not allowable, ignored"; + return 0; + } + auto impl = reinterpret_cast(device_info); + return impl->device_id; +} + +void OH_AI_DeviceInfoSetPerformanceMode(OH_AI_DeviceInfoHandle device_info, OH_AI_PerformanceMode mode) { + if (device_info == nullptr) { + MS_LOG(ERROR) << "device info is null"; + return; + } + if (OH_AI_DeviceInfoGetDeviceType(device_info) != OH_AI_DEVICETYPE_NNRT) { + MS_LOG(ERROR) << "Set performance_mode of non-NNRT device is not allowable, ignored"; + return; + } + auto impl = reinterpret_cast(device_info); + impl->performance_mode = mode; +} + +OH_AI_PerformanceMode OH_AI_DeviceInfoGetPerformanceMode(const OH_AI_DeviceInfoHandle device_info) { + if (device_info == nullptr) { + MS_LOG(ERROR) << "device info is null"; + return OH_AI_PERFORMANCE_NONE; + } + if (OH_AI_DeviceInfoGetDeviceType(device_info) != OH_AI_DEVICETYPE_NNRT) { + MS_LOG(ERROR) << "Get performance_mode of non-NNRT device is not allowable, ignored"; + return OH_AI_PERFORMANCE_NONE; + } + auto impl = reinterpret_cast(device_info); + return impl->performance_mode; +} + +void OH_AI_DeviceInfoSetPriority(OH_AI_DeviceInfoHandle device_info, OH_AI_Priority priority) { + if (device_info == nullptr) { + MS_LOG(ERROR) << "device info is null"; + return; + } + if (OH_AI_DeviceInfoGetDeviceType(device_info) != OH_AI_DEVICETYPE_NNRT) { + MS_LOG(ERROR) << "Set priority of non-NNRT device is not allowable, ignored"; + return; + } + auto impl = reinterpret_cast(device_info); + impl->priority = priority; +} + +OH_AI_Priority OH_AI_DeviceInfoGetPriority(const OH_AI_DeviceInfoHandle device_info) { + if (device_info == nullptr) { + MS_LOG(ERROR) << "device info is null"; + return OH_AI_PRIORITY_NONE; + } + if (OH_AI_DeviceInfoGetDeviceType(device_info) != OH_AI_DEVICETYPE_NNRT) { + MS_LOG(ERROR) << "Get priority of non-NNRT device is not allowable, ignored"; + return OH_AI_PRIORITY_NONE; + } + auto impl = reinterpret_cast(device_info); + return impl->priority; +} diff --git a/mindspore/lite/src/runtime/c_api/context_c.h b/mindspore/lite/src/runtime/c_api/context_c.h index 7b9db3ea..0fb2f3e7 100644 --- a/mindspore/lite/src/runtime/c_api/context_c.h +++ b/mindspore/lite/src/runtime/c_api/context_c.h @@ -29,6 +29,9 @@ typedef struct DeviceInfoC { OH_AI_DeviceType device_type; bool enable_fp16 = false; int frequency = 3; + size_t device_id = 0; + OH_AI_PerformanceMode performance_mode = OH_AI_PERFORMANCE_NONE; + OH_AI_Priority priority = OH_AI_PRIORITY_NONE; std::string provider; std::string provider_device; std::shared_ptr allocator = nullptr; diff --git a/mindspore/lite/src/runtime/c_api/type_c_private.h b/mindspore/lite/src/runtime/c_api/type_c_private.h new file mode 100644 index 00000000..b3b64748 --- /dev/null +++ b/mindspore/lite/src/runtime/c_api/type_c_private.h @@ -0,0 +1,40 @@ +/** + * Copyright 2023 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef MINDSPORE_LITE_SRC_RUNTIME_C_API_TYPE_C_PRIVATE_H_ +#define MINDSPORE_LITE_SRC_RUNTIME_C_API_TYPE_C_PRIVATE_H_ + +#include +#include +#include +#include +#include "include/c_api/types_c.h" + +#ifdef __cplusplus +extern "C" { +#endif + +#define NNRT_DEVICE_NAME_MAX (128) + +struct NNRTDeviceDesc { + size_t device_id; + OH_AI_NNRTDeviceType device_type; + char device_name[NNRT_DEVICE_NAME_MAX]; +}; + +#ifdef __cplusplus +} +#endif +#endif // MINDSPORE_LITE_SRC_RUNTIME_C_API_TYPE_C_PRIVATE_H_ diff --git a/mindspore/lite/src/runtime/cxx_api/context.cc b/mindspore/lite/src/runtime/cxx_api/context.cc index d550975b..6ac926b9 100644 --- a/mindspore/lite/src/runtime/cxx_api/context.cc +++ b/mindspore/lite/src/runtime/cxx_api/context.cc @@ -48,6 +48,10 @@ constexpr auto KModelOptionAscendFusionSwitchCfgPath = "mindspore.option.ascend. constexpr auto kModelOptionAscendDynamicBatchSize = "mindspore.option.ascend.dynamic_batch_size"; constexpr auto kModelOptionAscendDynamicImageSize = "mindspore.option.ascend.dynamic_image_size"; constexpr auto kModelOptionAscendBufferOptimize = "mindspore.option.ascend.buffer_optimize"; +constexpr auto kModelOptionNNRTDeviceID = "mindspore.option.nnrt.device_id"; +constexpr auto kModelOptionNNRTPerformanceMode = "mindspore.option.nnrt.performance_mode"; +constexpr auto kModelOptionNNRTPriority = "mindspore.option.nnrt.priority"; +constexpr auto kModelOptionNNRTEnableFP16 = "mindspore.option.nnrt.enable_fp16"; Context::Context() : data_(std::make_shared()) {} @@ -586,4 +590,68 @@ std::vector AscendDeviceInfo::GetBufferOptimizeModeChar() const { const std::string &ref = GetValue(data_, kModelOptionAscendBufferOptimize); return StringToChar(ref); } + +void NNRTDeviceInfo::SetDeviceID(size_t device_id) { + if (data_ == nullptr) { + MS_LOG(ERROR) << "Invalid context."; + return; + } + data_->params[kModelOptionNNRTDeviceID] = device_id; +} + +size_t NNRTDeviceInfo::GetDeviceID() const { + if (data_ == nullptr) { + MS_LOG(ERROR) << "Invalid context."; + return 0; + } + return GetValue(data_, kModelOptionNNRTDeviceID); +} + +void NNRTDeviceInfo::SetPerformanceMode(int performance_mode) { + if (data_ == nullptr) { + MS_LOG(ERROR) << "Invalid context."; + return; + } + data_->params[kModelOptionNNRTPerformanceMode] = performance_mode; +} + +int NNRTDeviceInfo::GetPerformanceMode() const { + if (data_ == nullptr) { + MS_LOG(ERROR) << "Invalid context."; + return 0; + } + return GetValue(data_, kModelOptionNNRTPerformanceMode); +} + +void NNRTDeviceInfo::SetPriority(int priority) { + if (data_ == nullptr) { + MS_LOG(ERROR) << "Invalid context."; + return; + } + data_->params[kModelOptionNNRTPriority] = priority; +} + +int NNRTDeviceInfo::GetPriority() const { + if (data_ == nullptr) { + MS_LOG(ERROR) << "Invalid context."; + return 0; + } + return GetValue(data_, kModelOptionNNRTPriority); +} + +void NNRTDeviceInfo::SetEnableFP16(bool is_fp16) { + if (data_ == nullptr) { + MS_LOG(ERROR) << "Invalid context."; + return; + } + data_->params[kModelOptionNNRTEnableFP16] = is_fp16; +} + +bool NNRTDeviceInfo::GetEnableFP16() const { + if (data_ == nullptr) { + MS_LOG(ERROR) << "Invalid context."; + return false; + } + return GetValue(data_, kModelOptionNNRTEnableFP16); +} } // namespace mindspore diff --git a/mindspore/lite/src/runtime/cxx_api/converters.cc b/mindspore/lite/src/runtime/cxx_api/converters.cc index 23a02778..5f2bd40f 100644 --- a/mindspore/lite/src/runtime/cxx_api/converters.cc +++ b/mindspore/lite/src/runtime/cxx_api/converters.cc @@ -72,8 +72,13 @@ Status ContextUtils::AddAscendDevice(lite::InnerContext *inner_context, DeviceIn return kSuccess; } -Status ContextUtils::AddNNRtDevice(lite::InnerContext *inner_context) { +Status ContextUtils::AddNNRtDevice(lite::InnerContext *inner_context, size_t device_id, int performance_mode, + int priority, bool enable_fp16) { lite::DeviceInfo device_info = {0}; + device_info.nnrt_device_info_.device_id_ = device_id; + device_info.nnrt_device_info_.performance_mode_ = performance_mode; + device_info.nnrt_device_info_.priority_ = priority; + device_info.nnrt_device_info_.enable_fp16_ = enable_fp16; inner_context->device_list_.push_back({lite::DT_NNRT, device_info}); return kSuccess; } @@ -122,7 +127,10 @@ lite::InnerContext *ContextUtils::Convert(Context *context) { } else if (device->GetDeviceType() == kAscend) { ret = AddAscendDevice(inner_context.get(), device.get()); } else if (device->GetDeviceType() == kNNRt) { - ret = AddNNRtDevice(inner_context.get()); + auto nnrt_device_info = device->Cast(); + ret = AddNNRtDevice(inner_context.get(), nnrt_device_info->GetDeviceID(), + nnrt_device_info->GetPerformanceMode(), nnrt_device_info->GetPriority(), + nnrt_device_info->GetEnableFP16()); } if (ret != kSuccess) { MS_LOG(ERROR) << "Add device failed!"; @@ -162,7 +170,8 @@ lite::InnerContext *ContextUtils::Convert(const ContextC *context_c) { } else if (device_info_c->device_type == OH_AI_DEVICETYPE_KIRIN_NPU) { ret = AddNpuDevice(device_info_c->frequency, inner_context.get()); } else if (device_info_c->device_type == OH_AI_DEVICETYPE_NNRT) { - ret = AddNNRtDevice(inner_context.get()); + ret = AddNNRtDevice(inner_context.get(), device_info_c->device_id, device_info_c->performance_mode, + device_info_c->priority, device_info_c->enable_fp16); } if (ret != kSuccess) { MS_LOG(ERROR) << "Add device failed!"; diff --git a/mindspore/lite/src/runtime/cxx_api/converters.h b/mindspore/lite/src/runtime/cxx_api/converters.h index 11338875..bd7daabb 100644 --- a/mindspore/lite/src/runtime/cxx_api/converters.h +++ b/mindspore/lite/src/runtime/cxx_api/converters.h @@ -45,7 +45,7 @@ class ContextUtils { lite::InnerContext *inner_context); static Status AddNpuDevice(int frequency, lite::InnerContext *inner_context); static Status AddAscendDevice(lite::InnerContext *inner_context, DeviceInfoContext *device); - static Status AddNNRtDevice(lite::InnerContext *inner_context); + static Status AddNNRtDevice(lite::InnerContext *inner_context, size_t device_id, int performance_mode, int priority, bool enable_fp16); static bool IsAffinityModeValid(int affinity_mode) { return affinity_mode >= lite::NO_BIND && affinity_mode <= lite::MID_CPU; } diff --git a/mindspore/lite/src/runtime/delegate/nnrt/nnrt_delegate.cc b/mindspore/lite/src/runtime/delegate/nnrt/nnrt_delegate.cc index 67d4e6c4..8b6e5ba4 100644 --- a/mindspore/lite/src/runtime/delegate/nnrt/nnrt_delegate.cc +++ b/mindspore/lite/src/runtime/delegate/nnrt/nnrt_delegate.cc @@ -13,6 +13,9 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + +#include +#include #include "nnrt_delegate.h" #include "checker/primitive_check.h" #include "src/common/log_adapter.h" @@ -21,139 +24,432 @@ #include "nnrt_model_kernel.h" #include "schema/model_generated.h" #include "flatbuffers/flatbuffers.h" +#include "runtime/tensor_category.h" -mindspore::Status mindspore::NNRTDelegate::Build(DelegateModel *model) { +namespace mindspore { +namespace lite { +Status NNRTDelegate::Build(DelegateModel *model) { MS_LOG(DEBUG) << "Start to build NNRT model."; - if (this->nnrt_lite_graph == nullptr) { - MS_LOG(ERROR) << "nnrt_lite_graph is nullptr."; - return mindspore::kLiteError; + if ((lite_graph_ == nullptr) || (lite_graph_->sub_graphs_.size() > 1)) { + MS_LOG(WARNING) << "LiteGraph contains more than one subgraph. NNRT does not support control-flow model yet, fallback to CPU"; + return kSuccess; } - if (this->nnrt_lite_graph->sub_graphs_.empty()) { - // must have at lease one subgraph - MS_LOG(ERROR) << "must have at lease one subgraph"; - return mindspore::kLiteError; + + OH_NNModel *full_model = BuildFullNNModel(); + if (full_model == nullptr) { + MS_LOG(WARNING) << "Build full NNModel failed, fallback to CPU"; + return kSuccess; } - OH_NN_ReturnCode ret_code; - OH_NNModel *oh_nnmodel = OH_NNModel_Construct(); - if (oh_nnmodel == nullptr) { - MS_LOG(ERROR) << "Construct NNModel failed, oh_nnmodel is nullptr."; - return mindspore::kLiteError; + std::vector op_supports = QueryOpSupports(full_model); + if (op_supports.empty()) { + MS_LOG(WARNING) << "Query no op supports for full model, fallback to CPU"; + OH_NNModel_Destroy(&full_model); + return kSuccess; } + auto nnrt_subgraph_ranges = GetNNRTSubgraphRanges(model, op_supports); + MS_LOG(INFO) << "Found NNRT subgraph count: " << nnrt_subgraph_ranges.size(); - ret_code = OH_NNModel_BuildFromLiteGraph(oh_nnmodel, this->nnrt_lite_graph); - if (ret_code != OH_NN_SUCCESS) { - MS_LOG(ERROR) << "Build NNModel failed, OH_NN_ReturnCode = " << ret_code; - OH_NNModel_Destroy(&oh_nnmodel); - return mindspore::kLiteError; + std::vector sub_lite_graphs; + auto ret = CreateLiteGraphForNNRTSubgraph(nnrt_subgraph_ranges, &sub_lite_graphs); + if (ret != kSuccess) { + OH_NNModel_Destroy(&full_model); + MS_LOG(WARNING) << "Create NNRT sub LiteGraph failed, fallback to CPU"; + return kSuccess; } - MS_LOG(DEBUG) << "NNRTDelegate creates NNModel success."; - OH_NNCompilation *oh_nn_compilation = nullptr; - oh_nn_compilation = OH_NNCompilation_Construct(oh_nnmodel); + std::vector nnrt_subgraph_kernels; + ret = CreateNNRTSubgraphKernels(model, sub_lite_graphs, nnrt_subgraph_ranges, &nnrt_subgraph_kernels); + if (ret != kSuccess) { + OH_NNModel_Destroy(&full_model); + MS_LOG(WARNING) << "Create NNRT subgraph kernel failed, fallback to CPU"; + return kSuccess; + } + + ReplaceNNRTKernelsInDelegateModel(model, nnrt_subgraph_ranges, nnrt_subgraph_kernels); + OH_NNModel_Destroy(&full_model); + MS_LOG(INFO) << "NNRTDelegate build success."; + return kSuccess; +} + +OH_NNModel *NNRTDelegate::BuildFullNNModel() { + if (lite_graph_ == nullptr) { + MS_LOG(ERROR) << "Lite graph is null"; + return nullptr; + } - if (oh_nn_compilation == nullptr) { - MS_LOG(ERROR) << "Construct NNCompilation failed"; - OH_NNModel_Destroy(&oh_nnmodel); - return mindspore::kLiteError; + if (lite_graph_->sub_graphs_.empty()) { + MS_LOG(ERROR) << "Lite graph must have at lease one subgraph"; + return nullptr; } - MS_LOG(DEBUG) << "NNRTDelegate creates NNCompilation success."; - const size_t *allDevicesID = nullptr; - uint32_t device_count = 0; - ret_code = OH_NNDevice_GetAllDevicesID(&allDevicesID, &device_count); - if (ret_code != OH_NN_SUCCESS) { - MS_LOG(ERROR) << "NNModel GetAllDevicesID failed, OH_NN_ReturnCode = " << ret_code; - OH_NNCompilation_Destroy(&oh_nn_compilation); - OH_NNModel_Destroy(&oh_nnmodel); - return mindspore::kLiteError; + OH_NNModel *nn_model = OH_NNModel_Construct(); + if (nn_model == nullptr) { + MS_LOG(ERROR) << "Create NNModel failed, result is nullptr"; + return nullptr; } - if (device_count <= 0) { - MS_LOG(WARNING) << "No NNRt Device found, fall back to CPU. "; - // OH_NNCompilation_Destroy(&oh_nn_compilation); - // OH_NNModel_Destroy(&oh_nnmodel); - return mindspore::kSuccess; + auto ret = OH_NNModel_BuildFromLiteGraph(nn_model, lite_graph_); + if (ret != OH_NN_SUCCESS) { + MS_LOG(ERROR) << "Build NNModel failed, ret: " << ret; + OH_NNModel_Destroy(&nn_model); + return nullptr; } - MS_LOG(DEBUG) << "NNRTDelegate GetAllDevicesID success. device_count: " << device_count; + return nn_model; +} - // check if model ops are supported - const bool *issupported = nullptr; +std::vector NNRTDelegate::QueryOpSupports(OH_NNModel *nn_model) { + const bool *is_supported = nullptr; // Note: this memory is owned by nn_model, don't free alone. uint32_t op_count = 0; - ret_code = OH_NNModel_GetAvailableOperations(oh_nnmodel, allDevicesID[0], &issupported, &op_count); - if (ret_code != OH_NN_SUCCESS) { - MS_LOG(ERROR) << "NNModel GetAvailableOperations failed, OH_NN_ReturnCode = " << ret_code - << ", maybe due to dataParcel data length limitaion. Fall back to CPU."; - OH_NNCompilation_Destroy(&oh_nn_compilation); - OH_NNModel_Destroy(&oh_nnmodel); - return mindspore::kSuccess; + auto ret = OH_NNModel_GetAvailableOperations(nn_model, nnrt_device_info_.device_id_, &is_supported, &op_count); + if (ret != OH_NN_SUCCESS) { + MS_LOG(WARNING) << "NNModel GetAvailableOperations failed, ret: " << ret + << ", maybe caused by dataParcel data length limitation"; + return {}; } - uint32_t supported_op_count = 0; - for (uint32_t i = 0; i < op_count; i++) { - if (issupported[i]) { - supported_op_count++; + std::vector op_supports(is_supported, is_supported + op_count); + return op_supports; +} + +/* Find continuous sub-sequence in op_supports. */ +std::vector NNRTDelegate::GetNNRTSubgraphRanges(DelegateModel *model, + const std::vector &op_supports) { + std::vector nnrt_subgraph_ranges; + NNRTOpRange op_range; + bool start_count = false; + for (size_t i = 0; i < op_supports.size(); i++) { + if (op_supports[i]) { + if (start_count == false) { + start_count = true; + op_range.begin_index_ = i; + op_range.begin_iter_ = model->BeginKernelIterator() + i; + } + } else { + if (start_count == true) { + start_count = false; + op_range.end_index_ = i; + op_range.end_iter_ = model->BeginKernelIterator() + i; + nnrt_subgraph_ranges.push_back(op_range); + } } } - if (op_count != supported_op_count) { - MS_LOG(WARNING) << "this model has " << op_count << "ops, but NNRT only support " << supported_op_count - << " ops, fall back to CPU."; - // must support all op, else fall back to CPU - OH_NNCompilation_Destroy(&oh_nn_compilation); - OH_NNModel_Destroy(&oh_nnmodel); - return mindspore::kSuccess; + // handle last true subsequence + if (start_count == true) { + op_range.end_index_ = op_supports.size(); + op_range.end_iter_ = model->EndKernelIterator(); + nnrt_subgraph_ranges.push_back(op_range); + MS_LOG(INFO) << "Schedule NNRT subgraph range: [" << op_range.begin_index_ << ", " << op_range.end_index_ << ")"; } - MS_LOG(DEBUG) << "NNRtDelegate supports all op in this model."; + return nnrt_subgraph_ranges; +} - ret_code = OH_NNCompilation_SetDevice(oh_nn_compilation, allDevicesID[0]); +/** + * This method ONLY works when the follow pre-conditions are satisfied: + * 1. The node order of lite_graph_->all_nodes should be consistent with DelegateModel sequence. + * This ensures the kernel replacement in DelegateModel based on the re-organizing info from lite_graph_ is correct. + * 2. The node indices of lite_graph_->sub_graphs[0].node_indices should be monotonically increasing from 0 to size - 1. + */ +Status NNRTDelegate::CreateLiteGraphForNNRTSubgraph( + const std::vector &nnrt_op_ranges, + std::vector *sub_lite_graphs) { + MS_LOG(INFO) << "Start creating LiteGraph for NNRT subgraph"; + for (const auto &op_range: nnrt_op_ranges) { + MS_LOG(INFO) << "Process op range: [" << op_range.begin_index_ << ", " << op_range.end_index_ << ")"; + LiteGraph *sub_lite_graph = new (std::nothrow)LiteGraph; + if (sub_lite_graph == nullptr) { + MS_LOG(ERROR) << "Allocate LiteGraph failed"; + return kLiteError; + } + sub_lite_graph->name_ = lite_graph_->name_; + sub_lite_graph->version_ = lite_graph_->version_; - if (ret_code != OH_NN_SUCCESS) { - MS_LOG(ERROR) << "NNCompilation SetDevice failed, OH_NN_ReturnCode = " << ret_code; - OH_NNCompilation_Destroy(&oh_nn_compilation); - OH_NNModel_Destroy(&oh_nnmodel); - return mindspore::kLiteError; + auto sub_graph = new (std::nothrow)LiteGraph::SubGraph; + if (sub_graph == nullptr) { + MS_LOG(ERROR) << "Allocate SubGraph failed"; + return kLiteError; + } + sub_graph->name_ = lite_graph_->name_; + sub_lite_graph->sub_graphs_.push_back(sub_graph); + + // deal with all_nodes + MS_LOG(INFO) << "Assemble all_nodes..."; + int new_node_index = 0; + std::map in_tensor_index_map; + std::map out_tensor_index_map; + for (size_t index = op_range.begin_index_; index < op_range.end_index_; index++) { + LiteGraph::Node *node = new (std::nothrow)LiteGraph::Node; + if (node == nullptr) { + MS_LOG(ERROR) << "Allocate Node failed"; + return kLiteError; + } + *node = *lite_graph_->all_nodes_[index]; + sub_lite_graph->all_nodes_.push_back(node); + sub_graph->node_indices_.push_back(new_node_index++); + + for (auto i: node->input_indices_) { + in_tensor_index_map.emplace(i, lite_graph_->all_tensors_[i]); + } + for (auto i: node->output_indices_) { + out_tensor_index_map.emplace(i, lite_graph_->all_tensors_[i]); + } + } + + // deal with all_tensors + MS_LOG(INFO) << "Assemble all_tensors..."; + std::set tensors; + for (auto iter: in_tensor_index_map) { + tensors.emplace(iter.second); + } + for (auto iter: out_tensor_index_map) { + tensors.emplace(iter.second); + } + + uint32_t new_index = 0; + std::map new_tensor_maps; + for (auto tensor: tensors) { + new_tensor_maps.emplace(tensor, new_index++); + } + + sub_lite_graph->all_tensors_ = std::vector(tensors.begin(), tensors.end()); + + // deal with every node's input/output indices + MS_LOG(INFO) << "Set input/output indices of each node..."; + for (auto node: sub_lite_graph->all_nodes_) { + for (auto &index : node->input_indices_) { + index = new_tensor_maps.at(in_tensor_index_map.at(index)); + } + for (auto &index : node->output_indices_) { + index = new_tensor_maps.at(out_tensor_index_map.at(index)); + } + } + + // deal with subgraph's input/output indices + MS_LOG(INFO) << "Set input/output indices of each subgraph..."; + sub_graph->tensor_indices_ = std::vector(tensors.size()); + std::iota(sub_graph->tensor_indices_.begin(), sub_graph->tensor_indices_.end(), 0U); + + for (auto iter: in_tensor_index_map) { + auto new_tensor_index = new_tensor_maps[iter.second]; + MS_LOG(DEBUG) << "handle input: old: " << iter.first << ", new: " << new_tensor_index << std::endl; + if (IsConstTensor(*iter.second)) { + MS_LOG(DEBUG) << "- tensor: " << new_tensor_index << " is const." << std::endl; + continue; + } + + bool is_subgraph_input = true; + for (auto node: sub_lite_graph->all_nodes_) { + if (std::find(node->output_indices_.begin(), node->output_indices_.end(), new_tensor_index) != + node->output_indices_.end()) { + is_subgraph_input = false; + MS_LOG(DEBUG) << "- tensor: " << new_tensor_index << " is not subgraph input." << std::endl; + break; + } + } + if (is_subgraph_input) { + sub_graph->input_indices_.push_back(new_tensor_index); + MS_LOG(DEBUG) << "- select tensor: " << new_tensor_index << " as subgraph input." << std::endl; + } + } + + for (auto iter: out_tensor_index_map) { + int new_tensor_index = new_tensor_maps.at(iter.second); + MS_LOG(DEBUG) << "handle output: old: " << iter.first << ", new: " << new_tensor_index << std::endl; + if (IsConstTensor(*iter.second)) { + MS_LOG(DEBUG) << "- tensor: " << new_tensor_index << " is const." << std::endl; + continue; + } + + bool is_subgraph_output = false; + for (size_t i = 0; i < lite_graph_->all_nodes_.size(); i++) { + if ((i >= op_range.begin_index_) && (i < op_range.end_index_)) { + continue; + } + auto node = lite_graph_->all_nodes_[i]; + if (std::find(node->input_indices_.begin(), node->input_indices_.end(), iter.first) != + node->input_indices_.end()) { // As the input of node which does not belong to the subgraph. + is_subgraph_output = true; + MS_LOG(DEBUG) << "- tensor: " << new_tensor_index << " is original subgraph output. node: " << node->primitive_ << std::endl; + break; + } + } + bool is_graph_output = (std::find(lite_graph_->output_indices_.begin(),lite_graph_->output_indices_.end(), + iter.first) != lite_graph_->output_indices_.end()); + if (is_graph_output) { + MS_LOG(DEBUG) << "- tensor: " << new_tensor_index << " is graph output." << std::endl; + } + if (is_subgraph_output || is_graph_output) { + sub_graph->output_indices_.push_back(new_tensor_index); + MS_LOG(DEBUG) << "- select tensor: " << new_tensor_index << " as subgraph output." << std::endl; + } + } + + // deal with full-graph's input/output indices + sub_lite_graph->input_indices_ = sub_graph->input_indices_; + sub_lite_graph->output_indices_ = sub_graph->output_indices_; + sub_lite_graphs->push_back(sub_lite_graph); } + MS_LOG(INFO) << "Finished creating LiteGraph for NNRT subgraph"; + return kSuccess; +} - ret_code = OH_NNCompilation_Build(oh_nn_compilation); +struct TensorLocation { + uint32_t node_index; // the index of node which the tensor belongs to. + uint32_t tensor_index; // the index of node in/out tensors which the tensor is located at. +}; +Status NNRTDelegate::InitNNCompilation(OH_NNCompilation *nn_compilation) const { + auto ret_code = OH_NNCompilation_SetDevice(nn_compilation, nnrt_device_info_.device_id_); if (ret_code != OH_NN_SUCCESS) { - MS_LOG(ERROR) << "Build NNCompilation failed, OH_NN_ReturnCode = " << ret_code; - OH_NNCompilation_Destroy(&oh_nn_compilation); - OH_NNModel_Destroy(&oh_nnmodel); - return mindspore::kLiteError; + MS_LOG(ERROR) << "NNCompilation set device id failed, ret: " << ret_code; + return kLiteError; + } + ret_code = OH_NNCompilation_SetPerformanceMode(nn_compilation, + (OH_NN_PerformanceMode)(nnrt_device_info_.performance_mode_)); + if ((ret_code != OH_NN_SUCCESS) && (ret_code != OH_NN_OPERATION_FORBIDDEN)) { + MS_LOG(ERROR) << "NNCompilation set performance mode failed, ret: " << ret_code; + return kLiteError; + } + ret_code = OH_NNCompilation_SetPriority(nn_compilation, (OH_NN_Priority)(nnrt_device_info_.priority_)); + if ((ret_code != OH_NN_SUCCESS) && (ret_code != OH_NN_OPERATION_FORBIDDEN)) { + MS_LOG(ERROR) << "NNCompilation set priority failed, ret: " << ret_code; + return kLiteError; + } + ret_code = OH_NNCompilation_EnableFloat16(nn_compilation, nnrt_device_info_.enable_fp16_); + if ((ret_code != OH_NN_SUCCESS) && (ret_code != OH_NN_OPERATION_FORBIDDEN)) { + MS_LOG(ERROR) << "NNCompilation enable fp16 failed, ret: " << ret_code; + return kLiteError; } - MS_LOG(DEBUG) << "NNRTDelegate SetDevice success."; - - OH_NNExecutor *oh_nn_executor = nullptr; - oh_nn_executor = OH_NNExecutor_Construct(oh_nn_compilation); - if (oh_nn_executor == nullptr) { - MS_LOG(ERROR) << "Construct NNCompilation SetDevice failed, OH_NN_ReturnCode = " << ret_code; - OH_NNCompilation_Destroy(&oh_nn_compilation); - OH_NNModel_Destroy(&oh_nnmodel); - return mindspore::kLiteError; + ret_code = OH_NNCompilation_Build(nn_compilation); + if (ret_code != OH_NN_SUCCESS) { + MS_LOG(ERROR) << "Build NNCompilation failed, ret: " << ret_code; + return kLiteError; } - MS_LOG(DEBUG) << "NNRTDelegate creates NNExecutor success."; - mindspore::Status prepare_data_ret; - auto nnr_model_kernel = new (std::nothrow) NNRTModelKernel(oh_nn_executor, model->inputs(), model->outputs()); - if (nnr_model_kernel == nullptr) { - MS_LOG(ERROR) << "new NNRTModelKernel failed"; - return mindspore::kLiteError; + return kSuccess; +} + +Status NNRTDelegate::CreateNNRTSubgraphKernels(DelegateModel *model, + const std::vector &sub_lite_graphs, const std::vector &nnrt_subgraph_ranges, + std::vector *nnrt_subgraph_kernels) { + for (size_t i = 0; i < sub_lite_graphs.size(); i++) { + auto sub_lite_graph = sub_lite_graphs[i]; + + OH_NNModel *nn_model = OH_NNModel_Construct(); + auto ret = OH_NNModel_BuildFromLiteGraph(nn_model, sub_lite_graph); + if (ret != OH_NN_SUCCESS) { + MS_LOG(ERROR) << "Build NNModel failed, ret: " << ret; + OH_NNModel_Destroy(&nn_model); + return kLiteError; + } + + OH_NNCompilation *nn_compilation = OH_NNCompilation_Construct(nn_model); + if (nn_compilation == nullptr) { + MS_LOG(ERROR) << "Construct NNCompilation failed"; + OH_NNModel_Destroy(&nn_model); + return kLiteError; + } + MS_LOG(DEBUG) << "NNRTDelegate creates NNCompilation success."; + + auto ret_code = InitNNCompilation(nn_compilation); + if (ret_code != kSuccess) { + MS_LOG(ERROR) << "Init NNCompilation failed"; + OH_NNCompilation_Destroy(&nn_compilation); + OH_NNModel_Destroy(&nn_model); + return kLiteError; + } + + OH_NNExecutor *nn_executor = nullptr; + nn_executor = OH_NNExecutor_Construct(nn_compilation); + if (nn_executor == nullptr) { + MS_LOG(ERROR) << "Construct NNExecutor failed, ret: " << ret_code; + OH_NNCompilation_Destroy(&nn_compilation); + OH_NNModel_Destroy(&nn_model); + return kLiteError; + } + MS_LOG(DEBUG) << "NNRTDelegate creates NNExecutor success."; + + bool format_not_support = false; + std::vector in_tensors; + for (auto index: sub_lite_graph->sub_graphs_[0]->input_indices_) { + TensorLocation location; + for (auto node_index: sub_lite_graph->sub_graphs_[0]->node_indices_) { + auto node = sub_lite_graph->all_nodes_[node_index]; + auto iter = std::find(node->input_indices_.begin(), node->input_indices_.end(), index); + if (iter != node->input_indices_.end()) { + uint32_t tensor_index = iter - node->input_indices_.begin(); + location.node_index = node_index; + location.tensor_index = tensor_index; + MS_LOG(INFO) << "Found graph input index: " << index << " is the " << tensor_index << "th input of the node " << node->primitive_; + break; + } + } + KernelIter kernel_iter = nnrt_subgraph_ranges[i].begin_iter_ + location.node_index; + in_tensors.push_back((*kernel_iter)->inputs()[location.tensor_index]); + if (in_tensors.back().format() != Format::NHWC) { + format_not_support = true; + break ; + } + } + + std::vector out_tensors; + for (auto index: sub_lite_graph->sub_graphs_[0]->output_indices_) { + TensorLocation location; + for (auto node_index: sub_lite_graph->sub_graphs_[0]->node_indices_) { + auto node = sub_lite_graph->all_nodes_[node_index]; + auto iter = std::find(node->output_indices_.begin(), node->output_indices_.end(), index); + if (iter != node->output_indices_.end()) { + uint32_t tensor_index = iter - node->output_indices_.begin(); + location.node_index = node_index; + location.tensor_index = tensor_index; + MS_LOG(INFO) << "Found graph output index: " << index << " is the " << tensor_index << "th output of the node " << node->primitive_; + break; + } + } + KernelIter kernel_iter = nnrt_subgraph_ranges[i].begin_iter_ + location.node_index; + out_tensors.push_back((*kernel_iter)->outputs()[location.tensor_index]); + if (out_tensors.back().format() != Format::NHWC) { + format_not_support = true; + break ; + } + } + if (format_not_support) { + MS_LOG(WARNING) << "Not support in/out tensor format, skip this subgraph"; + OH_NNCompilation_Destroy(&nn_compilation); + OH_NNModel_Destroy(&nn_model); + nnrt_subgraph_kernels->push_back(nullptr); + continue ; + } + + auto nnrt_model_kernel = new (std::nothrow)NNRTModelKernel(nn_executor, in_tensors, out_tensors); + if (nnrt_model_kernel == nullptr) { + MS_LOG(ERROR) << "new NNRTModelKernel failed"; + return kLiteError; + } + OH_NNCompilation_Destroy(&nn_compilation); + OH_NNModel_Destroy(&nn_model); + nnrt_subgraph_kernels->push_back(nnrt_model_kernel); } - OH_NNCompilation_Destroy(&oh_nn_compilation); - OH_NNModel_Destroy(&oh_nnmodel); - KernelIter from = model->BeginKernelIterator(); - KernelIter end = model->EndKernelIterator(); - model->Replace(from, end, nnr_model_kernel); - - MS_LOG(DEBUG) << "NNRTDelegate build success."; - return mindspore::kSuccess; + return kSuccess; } -mindspore::Status mindspore::NNRTDelegate::Init() { - MS_LOG(DEBUG) << "NNRTDelegate init success."; - return mindspore::kSuccess; +void NNRTDelegate::ReplaceNNRTKernelsInDelegateModel(DelegateModel *model, + const std::vector &nnrt_subgraph_ranges, + const std::vector &nnrt_subgraph_kernels) { + // Here we perform the replacement from back to front intentionally! If replace from front to end, the kernel + // sequence would shrink and the later begin_iter_/end_iter_ may be erased already. + for (int i = nnrt_subgraph_ranges.size() - 1; i >= 0; i--) { + if (nnrt_subgraph_kernels[i] == nullptr) { + continue; + } + auto from = nnrt_subgraph_ranges[i].begin_iter_; + auto end = nnrt_subgraph_ranges[i].end_iter_; + (void)model->Replace(from, end, nnrt_subgraph_kernels[i]); + MS_LOG(INFO) << "Replace nnrt subgraph kernel in range: [" << (from - model->BeginKernelIterator()) + << ", " << (end - model->BeginKernelIterator()) << ")"; + } } -mindspore::Status mindspore::NNRTDelegate::PrepareInputs(DelegateModel *model, - OH_NNExecutor *oh_nn_executor) { + +Status NNRTDelegate::PrepareInputs(DelegateModel *model, + OH_NNExecutor *oh_nn_executor) { auto input_tensors = model->inputs(); for (size_t i = 0; i < input_tensors.size(); i++) { auto tensor = input_tensors[i]; @@ -164,10 +460,10 @@ mindspore::Status mindspore::NNRTDelegate::PrepareInputs(DelegateModel scale; std::vector zero_point; if (!tmp_quant_param.empty()) { - quant_param = new (std::nothrow) OH_NN_QuantParam; + quant_param = new(std::nothrow) OH_NN_QuantParam; if (quant_param == nullptr) { MS_LOG(ERROR) << "new OH_NN_QuantParam failed."; - return mindspore::kLiteError; + return kLiteError; } for (auto qparam : tmp_quant_param) { bit_num.emplace_back(qparam.bit_num); @@ -179,12 +475,12 @@ mindspore::Status mindspore::NNRTDelegate::PrepareInputs(DelegateModelscale = scale.data(); quant_param->zeroPoint = zero_point.data(); } - auto oprend = new (std::nothrow) OH_NN_Tensor; + auto oprend = new(std::nothrow) OH_NN_Tensor; if (oprend == nullptr) { MS_LOG(ERROR) << "new OH_NN_Tensor Failed"; - return mindspore::kLiteError; + return kLiteError; } - oprend->dataType = ConvertDataType(tensor.DataType()); + oprend->dataType = CastToNNRTDataType(tensor.DataType()); oprend->dimensionCount = tensor_shape.size(); std::vector dimensions_list; @@ -194,14 +490,14 @@ mindspore::Status mindspore::NNRTDelegate::PrepareInputs(DelegateModel subgraph_list; for (auto subgraph : lite_graph.sub_graphs_) { - auto new_subgraph = new (std::nothrow) LiteGraph::SubGraph; + auto new_subgraph = new(std::nothrow) LiteGraph::SubGraph; if (new_subgraph == nullptr) { MS_LOG(ERROR) << "new LiteGraph::Subgraph failed."; return; @@ -334,30 +597,32 @@ void mindspore::NNRTDelegate::ShallowCopyLiteGraph(const mindspore::lite::LiteGr } for (auto tensor : lite_graph.all_tensors_) { ret = lite::CheckTensorSupported(static_cast(tensor)); - if (ret == mindspore::kLiteError) { + if (ret == kLiteError) { MS_LOG(ERROR) << "tensor supported check failed."; return; } } - nnrt_lite_graph = new (std::nothrow) lite::LiteGraph(); - if (nnrt_lite_graph == nullptr) { + lite_graph_ = new(std::nothrow) lite::LiteGraph(); + if (lite_graph_ == nullptr) { MS_LOG(ERROR) << "new LiteGraph failed."; return; } - nnrt_lite_graph->name_ = lite_graph.name_; - nnrt_lite_graph->version_ = lite_graph.version_; - nnrt_lite_graph->input_indices_ = lite_graph.input_indices_; - nnrt_lite_graph->output_indices_ = lite_graph.output_indices_; - nnrt_lite_graph->all_tensors_ = lite_graph.all_tensors_; - nnrt_lite_graph->all_nodes_ = node_list; - nnrt_lite_graph->sub_graphs_ = subgraph_list; + lite_graph_->name_ = lite_graph.name_; + lite_graph_->version_ = lite_graph.version_; + lite_graph_->input_indices_ = lite_graph.input_indices_; + lite_graph_->output_indices_ = lite_graph.output_indices_; + lite_graph_->all_tensors_ = lite_graph.all_tensors_; + lite_graph_->all_nodes_ = node_list; + lite_graph_->sub_graphs_ = subgraph_list; MS_LOG(INFO) << "ShallowCopyLiteGraph success."; } -mindspore::NNRTDelegate::~NNRTDelegate() { - if (this->nnrt_lite_graph != nullptr) { +NNRTDelegate::~NNRTDelegate() { + if (lite_graph_ != nullptr) { MS_LOG(ERROR) << "Delete NNRTDelegate."; } -}; +} +} // namespace lite +} // namespace mindspore diff --git a/mindspore/lite/src/runtime/delegate/nnrt/nnrt_delegate.h b/mindspore/lite/src/runtime/delegate/nnrt/nnrt_delegate.h index 1be08119..48adc388 100644 --- a/mindspore/lite/src/runtime/delegate/nnrt/nnrt_delegate.h +++ b/mindspore/lite/src/runtime/delegate/nnrt/nnrt_delegate.h @@ -15,38 +15,64 @@ */ #ifndef MINDSPORE_NNR_DELEGATE_H #define MINDSPORE_NNR_DELEGATE_H + #include #include #include "include/api/delegate.h" #include "include/context.h" #include "include/model.h" +#include "nnrt_model_kernel.h" +#include "schema/model_generated.h" #include "interfaces/kits/c/neural_network_runtime_type.h" -namespace mindspore { +#include "interfaces/kits/c/neural_network_runtime.h" +#include "interfaces/innerkits/c/neural_network_runtime_inner.h" -using namespace lite; +namespace mindspore { +namespace lite { +struct NNRTOpRange { + /* NNRT kernel range in DelegateModel: [begin_iter_, end_iter_) */ + KernelIter begin_iter_; + KernelIter end_iter_; + /* NNRT node range in lite_graph_: [begin_index_, end_index_) */ + size_t begin_index_; + size_t end_index_; +}; class NNRTDelegate : public Delegate { public: - NNRTDelegate() : Delegate(){}; - + NNRTDelegate() = default; + NNRTDelegate(const NNRtDeviceInfo &nnrt_device_info) : nnrt_device_info_(nnrt_device_info) {} ~NNRTDelegate() override; - - Status Init() override; - + Status Init() override { return kSuccess; } Status Build(DelegateModel *model) override; - void ShallowCopyLiteGraph(const lite::LiteGraph &liteGraph); - - protected: - LiteGraph *nnrt_lite_graph = nullptr; + static std::vector GetNNRTSubgraphRanges(DelegateModel *model, + const std::vector &op_supports); private: - // static LiteGraph* CreateLiteGraph(const LiteGraph &liteGraph); + OH_NNModel *BuildFullNNModel(); + std::vector QueryOpSupports(OH_NNModel *nn_model); + + Status CreateLiteGraphForNNRTSubgraph( + const std::vector &nnrt_op_ranges, + std::vector *sub_lite_graphs); + Status CreateNNRTSubgraphKernels( + DelegateModel *model, + const std::vector &sub_lite_graphs, + const std::vector &nnrt_subgraph_ranges, + std::vector *nnrt_subgraph_kernels); + void ReplaceNNRTKernelsInDelegateModel(DelegateModel *model, + const std::vector &nnrt_subgraph_ranges, + const std::vector &nnrt_subgraph_kernels); Status PrepareInputs(DelegateModel *model, OH_NNExecutor *oh_nn_executor); Status PrepareOutputs(DelegateModel *model, OH_NNExecutor *oh_nn_executor); - OH_NN_DataType ConvertDataType(mindspore::DataType data_type); -}; + Status InitNNCompilation(OH_NNCompilation *nn_compilation) const; + static OH_NN_DataType CastToNNRTDataType(mindspore::DataType data_type); + NNRtDeviceInfo nnrt_device_info_; + LiteGraph *lite_graph_ = nullptr; +}; +} // namespace lite } // namespace mindspore #endif // MINDSPORE_NNR_DELEGATE_H diff --git a/mindspore/lite/src/runtime/delegate/nnrt/nnrt_stub.cc b/mindspore/lite/src/runtime/delegate/nnrt/nnrt_stub.cc index 886ac304..f2bee949 100644 --- a/mindspore/lite/src/runtime/delegate/nnrt/nnrt_stub.cc +++ b/mindspore/lite/src/runtime/delegate/nnrt/nnrt_stub.cc @@ -75,4 +75,25 @@ OH_NN_ReturnCode OH_NNModel_GetAvailableOperations(OH_NNModel *model, OH_NN_ReturnCode OH_NNModel_BuildFromLiteGraph(OH_NNModel *model, const void *liteGraph) { return OH_NN_SUCCESS; +} + +OH_NN_ReturnCode OH_NNDevice_GetName(size_t deviceID, const char **name) { + return OH_NN_SUCCESS; +} + +OH_NN_ReturnCode OH_NNDevice_GetType(size_t deviceID, OH_NN_DeviceType *deviceType) { + return OH_NN_SUCCESS; +} + +OH_NN_ReturnCode OH_NNCompilation_SetPriority(OH_NNCompilation *compilation, OH_NN_Priority priority) { + return OH_NN_SUCCESS; +} + +OH_NN_ReturnCode OH_NNCompilation_EnableFloat16(OH_NNCompilation *compilation, bool enableFloat16) { + return OH_NN_SUCCESS; +} + +OH_NN_ReturnCode OH_NNCompilation_SetPerformanceMode(OH_NNCompilation *compilation, + OH_NN_PerformanceMode performanceMode) { + return OH_NN_SUCCESS; } \ No newline at end of file diff --git a/mindspore/lite/src/runtime/lite_model.cc b/mindspore/lite/src/runtime/lite_model.cc index 124b4728..cd8e68d1 100644 --- a/mindspore/lite/src/runtime/lite_model.cc +++ b/mindspore/lite/src/runtime/lite_model.cc @@ -91,6 +91,8 @@ int LiteModel::ConvertSubGraph(const schema::SubGraph &sub_graph) { if (sub_graph.name() == nullptr || sub_graph.inputIndices() == nullptr || sub_graph.outputIndices() == nullptr || sub_graph.tensorIndices() == nullptr) { MS_LOG(ERROR) << "sub_graph is invalid"; + MS_LOG(ERROR) << "sub_graph.name() = " << sub_graph.name() << ", sub_graph.inputIndices() = " << sub_graph.inputIndices() + << ", sub_graph.outputIndices() = " << sub_graph.outputIndices() << ", sub_graph.tensorIndices() = " << sub_graph.tensorIndices(); return RET_ERROR; } @@ -530,6 +532,33 @@ Model *ImportFromBuffer(const char *model_buf, size_t size, bool take_buf, minds return model; } +std::string LiteGraph::ToString() const { + std::stringstream ss; + ss << "all_nodes: " << all_nodes_.size() << std::endl; + for (size_t i = 0; i < all_nodes_.size(); i++) { + ss << "- node " << i << ": " << all_nodes_[i]->primitive_ << std::endl; + ss << "- node " << i << " input_indices_: " << all_nodes_[i]->input_indices_ << std::endl; + ss << "- node " << i << " output_indices_: " << all_nodes_[i]->output_indices_ << std::endl; + } + ss << "all_tensors: " << all_tensors_.size() << std::endl; + for (size_t i = 0; i < all_tensors_.size(); i++) { + ss << "- tensor " << i << ": " << all_tensors_[i] << std::endl; + } + ss << "input_indices: " << input_indices_<< std::endl; + ss << "output_indices: " << output_indices_ << std::endl; + + ss << "subgraphs: " << std::endl; + int count = 0; + for (auto subgraph: sub_graphs_) { + ss << "- subgraph " << count++ << std::endl; + ss << "--- subgraph input " << subgraph->input_indices_ << std::endl; + ss << "--- subgraph output " << subgraph->output_indices_ << std::endl; + ss << "--- subgraph node " << subgraph->node_indices_ << std::endl; + ss << "--- subgraph tensor " << subgraph->tensor_indices_ << std::endl; + } + return ss.str(); +} + Model *Model::Import(const char *model_buf, size_t size) { return ImportFromBuffer(model_buf, size, false); } Model *Model::Import(const char *filename) { return ImportFromPath(filename); } diff --git a/mindspore/lite/src/runtime/lite_session.cc b/mindspore/lite/src/runtime/lite_session.cc index eb83f444..b8808e21 100644 --- a/mindspore/lite/src/runtime/lite_session.cc +++ b/mindspore/lite/src/runtime/lite_session.cc @@ -834,7 +834,14 @@ int LiteSession::CreateNPUDelegate() { int LiteSession::CreateNNRTDelegate() { #if SUPPORT_NNRT - delegate_ = std::make_shared(); + auto iter = std::find_if(context_->device_list_.begin(), context_->device_list_.end(), + [](DeviceContext &device) { return device.device_type_ == lite::DT_NNRT; }); + if(iter == context_->device_list_.end()) { + MS_LOG(ERROR) << "Found non NNRT device info"; + return RET_ERROR; + } + + delegate_ = std::make_shared(iter->device_info_.nnrt_device_info_); if (delegate_ == nullptr) { MS_LOG(ERROR) << "New NNRT delegate failed"; return RET_ERROR; diff --git a/mindspore/lite/src/runtime/tensor_category.cc b/mindspore/lite/src/runtime/tensor_category.cc index 07d14de0..9a755d81 100644 --- a/mindspore/lite/src/runtime/tensor_category.cc +++ b/mindspore/lite/src/runtime/tensor_category.cc @@ -30,5 +30,9 @@ Category TensorCategory(const schema::Tensor &tensor) { auto data_size = tensor.data() == nullptr ? 0 : tensor.data()->size(); return TensorCategory(tensor.nodeType(), shape_num, TypeId(tensor.dataType()), data_size); } + +bool IsConstTensor(const schema::Tensor &tensor) { + return TensorCategory(tensor) != Category::VAR; +} } // namespace lite } // namespace mindspore diff --git a/mindspore/lite/src/runtime/tensor_category.h b/mindspore/lite/src/runtime/tensor_category.h index 34d6cc82..8dadc344 100644 --- a/mindspore/lite/src/runtime/tensor_category.h +++ b/mindspore/lite/src/runtime/tensor_category.h @@ -34,6 +34,7 @@ enum Category { Category TensorCategory(const int node_type, const size_t shape_num, const TypeId data_type, const size_t data_size); Category TensorCategory(const schema::Tensor &tensor); +bool IsConstTensor(const schema::Tensor &tensor); } // namespace lite } // namespace mindspore #endif // MINDSPORE_LITE_SRC_RUNTIME_TENSOR_CATEGORY_H_ diff --git a/mindspore/lite/test/CMakeLists.txt b/mindspore/lite/test/CMakeLists.txt index 5fa7bea0..c7c4a3cb 100644 --- a/mindspore/lite/test/CMakeLists.txt +++ b/mindspore/lite/test/CMakeLists.txt @@ -28,9 +28,13 @@ file(GLOB_RECURSE TEST_UT_SRC ${TEST_DIR}/ut/src/runtime/kernel/arm/common/*.cc ${TEST_DIR}/ut/src/runtime/kernel/arm/fp32/*.cc ${TEST_DIR}/ut/src/runtime/kernel/arm/string/*.cc - ${TEST_DIR}/ut/src/api/context_c_test.cc - ${TEST_DIR}/ut/src/api/tensor_c_test.cc +# ${TEST_DIR}/ut/src/api/context_c_test.cc +# ${TEST_DIR}/ut/src/api/tensor_c_test.cc ) +if(MSLITE_ENABLE_NNRT) + list(APPEND TEST_UT_SRC ${TEST_DIR}/ut/src/runtime/nnrt_delegate/nnrt_delegate_tests.cc) +endif() + if(MSLITE_ENABLE_SERVER_INFERENCE) list(APPEND TEST_UT_SRC ${TEST_DIR}/ut/src/api/model_parallel_runner_test.cc) endif() @@ -85,7 +89,7 @@ endif() if(MSLITE_ENABLE_INT8) file(GLOB_RECURSE TEST_INT8_UT_SRC - ${TEST_DIR}/ut/src/runtime/kernel/arm/int8/*.cc +# ${TEST_DIR}/ut/src/runtime/kernel/arm/int8/*.cc ${TEST_DIR}/ut/nnacl/int8/*.cc ) list(APPEND TEST_UT_SRC ${TEST_INT8_UT_SRC}) diff --git a/mindspore/lite/test/ut/src/runtime/nnrt_delegate/nnrt_delegate_tests.cc b/mindspore/lite/test/ut/src/runtime/nnrt_delegate/nnrt_delegate_tests.cc new file mode 100644 index 00000000..e1ea3968 --- /dev/null +++ b/mindspore/lite/test/ut/src/runtime/nnrt_delegate/nnrt_delegate_tests.cc @@ -0,0 +1,59 @@ +/** + * Copyright 2023 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include "gtest/gtest.h" +#include "runtime/delegate/nnrt/nnrt_delegate.h" + +using namespace mindspore; +using namespace mindspore::lite; + +void AssertOpRange(const std::vector &op_ranges, std::vector> expect) { + ASSERT_EQ(op_ranges.size(), expect.size()); + for (size_t i = 0; i < op_ranges.size(); i++) { + ASSERT_EQ(op_ranges[i].begin_index_, expect[i][0]); + ASSERT_EQ(op_ranges[i].end_index_, expect[i][1]); + } +} + +TEST(NNRTDelegateTest, GetNNRTSubgraphRanges) { + // Prepare DelegateModel + std::vector kernels(5, nullptr); + std::vector inputs = {}; + std::vector outputs = {}; + std::unique_ptr> model; + model.reset(new DelegateModel(&kernels, inputs, outputs, {}, SCHEMA_CUR)); + + std::cout << "Test case 1, expect: {[0, 1), [3, 5)}" << std::endl; + auto op_ranges01 = NNRTDelegate::GetNNRTSubgraphRanges(model.get(), {true, false, false, true, true}); + AssertOpRange(op_ranges01, {{0, 1}, {3, 5}}); + + std::cout << "Test case 2, expect: {}" << std::endl; + auto op_ranges02 = NNRTDelegate::GetNNRTSubgraphRanges(model.get(), {false, false, false, false, false}); + AssertOpRange(op_ranges02, {}); + + std::cout << "Test case 3, expect: {[0, 5)}" << std::endl; + auto op_ranges03 = NNRTDelegate::GetNNRTSubgraphRanges(model.get(), {true, true, true, true, true}); + AssertOpRange(op_ranges03, {{0, 5}}); + + std::cout << "Test case 4, expect: {[0, 1), [2, 3), [4, 5)}" << std::endl; + auto op_ranges04 = NNRTDelegate::GetNNRTSubgraphRanges(model.get(), {true, false, true, false, true}); + AssertOpRange(op_ranges04, {{0, 1}, {2, 3}, {4, 5}}); + + std::cout << "Test case 5, expect: {[1, 2), [3, 4)}" << std::endl; + auto op_ranges05 = NNRTDelegate::GetNNRTSubgraphRanges(model.get(), {false, true, false, true, false}); + AssertOpRange(op_ranges05, {{1, 2}, {3, 4}}); +} \ No newline at end of file -- 2.34.1