1From d8ae073cf0e9f10fcbd129a145927492f65edcca Mon Sep 17 00:00:00 2001 2From: Zhu Guodong <zhuguodong0001@163.com> 3Date: Tue, 30 May 2023 12:17:40 +0800 4Subject: [PATCH] auto-apply 5 0010-nnrt-delegate-supports-heterogeneous-predition.patch 6 7--- 8 include/api/context.h | 40 ++ 9 include/c_api/context_c.h | 85 +++ 10 include/c_api/types_c.h | 36 + 11 include/sdk_api/context.h | 85 +++ 12 include/sdk_api/types.h | 38 +- 13 mindspore/lite/include/context.h | 4 + 14 mindspore/lite/include/model.h | 4 + 15 mindspore/lite/src/runtime/c_api/context_c.cc | 196 ++++++ 16 mindspore/lite/src/runtime/c_api/context_c.h | 3 + 17 .../lite/src/runtime/c_api/type_c_private.h | 40 ++ 18 mindspore/lite/src/runtime/cxx_api/context.cc | 68 ++ 19 .../lite/src/runtime/cxx_api/converters.cc | 15 +- 20 .../lite/src/runtime/cxx_api/converters.h | 2 +- 21 .../runtime/delegate/nnrt/nnrt_delegate.cc | 633 +++++++++++++----- 22 .../src/runtime/delegate/nnrt/nnrt_delegate.h | 54 +- 23 .../src/runtime/delegate/nnrt/nnrt_stub.cc | 21 + 24 mindspore/lite/src/runtime/lite_model.cc | 29 + 25 mindspore/lite/src/runtime/lite_session.cc | 9 +- 26 mindspore/lite/src/runtime/tensor_category.cc | 4 + 27 mindspore/lite/src/runtime/tensor_category.h | 1 + 28 mindspore/lite/test/CMakeLists.txt | 10 +- 29 .../nnrt_delegate/nnrt_delegate_tests.cc | 59 ++ 30 22 files changed, 1229 insertions(+), 207 deletions(-) 31 create mode 100644 mindspore/lite/src/runtime/c_api/type_c_private.h 32 create mode 100644 mindspore/lite/test/ut/src/runtime/nnrt_delegate/nnrt_delegate_tests.cc 33 34diff --git a/include/api/context.h b/include/api/context.h 35index d88b9d44..4c25aa10 100644 36--- a/include/api/context.h 37+++ b/include/api/context.h 38@@ -519,6 +519,46 @@ class MS_API NNRTDeviceInfo : public DeviceInfoContext { 39 /// 40 /// \return Type of this DeviceInfoContext. 41 enum DeviceType GetDeviceType() const override { return DeviceType::kNNRt; }; 42+ 43+ /// \brief Set device id. 44+ /// 45+ /// \param[in] device_id The device id. 46+ void SetDeviceID(size_t device_id); 47+ 48+ /// \brief Get the device id. 49+ /// 50+ /// \return The device id. 51+ size_t GetDeviceID() const; 52+ 53+ /// \brief Set performance mode. 54+ /// 55+ /// \param[in] performance_mode The performance mode. 56+ void SetPerformanceMode(int performance_mode); 57+ 58+ /// \brief Get performance mode. 59+ /// 60+ /// \return The priority. 61+ int GetPerformanceMode() const; 62+ 63+ /// \brief Set priority. 64+ /// 65+ /// \param[in] priority The priority. 66+ void SetPriority(int priority); 67+ 68+ /// \brief Get priority. 69+ /// 70+ /// \return The priority. 71+ int GetPriority() const; 72+ 73+ /// \brief Set enables to perform the float16 inference 74+ /// 75+ /// \param[in] is_fp16 Enable float16 inference or not. 76+ void SetEnableFP16(bool is_fp16); 77+ 78+ /// \brief Get enables to perform the float16 inference 79+ /// 80+ /// \return Whether enable float16 inference. 81+ bool GetEnableFP16() const; 82 }; 83 } // namespace mindspore 84 #endif // MINDSPORE_INCLUDE_API_CONTEXT_H 85diff --git a/include/c_api/context_c.h b/include/c_api/context_c.h 86index 53839e80..09220f20 100644 87--- a/include/c_api/context_c.h 88+++ b/include/c_api/context_c.h 89@@ -173,6 +173,91 @@ OH_AI_API void OH_AI_DeviceInfoSetFrequency(OH_AI_DeviceInfoHandle device_info, 90 /// \return NPU frequency 91 OH_AI_API int OH_AI_DeviceInfoGetFrequency(const OH_AI_DeviceInfoHandle device_info); 92 93+/// \brief Obtain the all device descriptions in NNRT. 94+/// 95+/// \param[out] num Number of NNRT device description. 96+/// 97+/// \return NNRT device description array. 98+OH_AI_API NNRTDeviceDesc *OH_AI_GetAllNNRTDeviceDescs(size_t *num); 99+ 100+/// \brief Destroy the NNRT device descriptions returned by OH_AI_GetAllNNRTDeviceDescs(). 101+/// 102+/// \param[in] desc NNRT device description array. 103+OH_AI_API void OH_AI_DestroyAllNNRTDeviceDescs(NNRTDeviceDesc **desc); 104+ 105+/// \brief Obtain the device id in NNRT device description. 106+/// 107+/// \param[in] desc pointer to the NNRT device description instance. 108+/// 109+/// \return NNRT device id. 110+OH_AI_API size_t OH_AI_GetDeviceIdFromNNRTDeviceDesc(const NNRTDeviceDesc *desc); 111+ 112+/// \brief Obtain the device name in NNRT device description. 113+/// 114+/// \param[in] desc pointer to the NNRT device description instance. 115+/// 116+/// \return NNRT device name. 117+OH_AI_API const char *OH_AI_GetNameFromNNRTDeviceDesc(const NNRTDeviceDesc *desc); 118+ 119+/// \brief Obtain the device type in NNRT device description. 120+/// 121+/// \param[in] desc pointer to the NNRT device description instance. 122+/// 123+/// \return NNRT device type. 124+OH_AI_API OH_AI_NNRTDeviceType OH_AI_GetTypeFromNNRTDeviceDesc(const NNRTDeviceDesc *desc); 125+ 126+/// \brief Create the NNRT device info by exactly matching the specific device name. 127+/// 128+/// \param[in] name NNRt device name. 129+/// 130+/// \return Device info object handle. 131+OH_AI_API OH_AI_DeviceInfoHandle OH_AI_CreateNNRTDeviceInfoByName(const char *name); 132+ 133+/// \brief Create the NNRT device info by finding the first device with the specific device type. 134+/// 135+/// \param[in] name NNRt device type. 136+/// 137+/// \return Device info object handle. 138+OH_AI_API OH_AI_DeviceInfoHandle OH_AI_CreateNNRTDeviceInfoByType(OH_AI_NNRTDeviceType type); 139+ 140+/// \brief Set the NNRT device id, Only valid for NNRT. 141+/// 142+/// \param[in] device_info Device info object handle. 143+/// \param[in] device_id NNRT device id. 144+OH_AI_API void OH_AI_DeviceInfoSetDeviceId(OH_AI_DeviceInfoHandle device_info, size_t device_id); 145+ 146+/// \brief Obtain the NNRT device id, Only valid for NNRT. 147+/// 148+/// \param[in] device_info Device info object handle. 149+/// 150+/// \return NNRT device id. 151+OH_AI_API size_t OH_AI_DeviceInfoGetDeviceId(const OH_AI_DeviceInfoHandle device_info); 152+ 153+/// \brief Set the NNRT performance mode, Only valid for NNRT. 154+/// 155+/// \param[in] device_info Device info object handle. 156+/// \param[in] device_id NNRT performance mode. 157+OH_AI_API void OH_AI_DeviceInfoSetPerformanceMode(OH_AI_DeviceInfoHandle device_info, OH_AI_PerformanceMode mode); 158+ 159+/// \brief Obtain the NNRT performance mode, Only valid for NNRT. 160+/// 161+/// \param[in] device_info Device info object handle. 162+/// 163+/// \return NNRT performance mode. 164+OH_AI_API OH_AI_PerformanceMode OH_AI_DeviceInfoGetPerformanceMode(const OH_AI_DeviceInfoHandle device_info); 165+ 166+/// \brief Set the NNRT priority, Only valid for NNRT. 167+/// 168+/// \param[in] device_info Device info object handle. 169+/// \param[in] device_id NNRT priority. 170+OH_AI_API void OH_AI_DeviceInfoSetPriority(OH_AI_DeviceInfoHandle device_info, OH_AI_Priority priority); 171+ 172+/// \brief Obtain the NNRT priority, Only valid for NNRT. 173+/// 174+/// \param[in] device_info Device info object handle. 175+/// 176+/// \return NNRT priority. 177+OH_AI_API OH_AI_Priority OH_AI_DeviceInfoGetPriority(const OH_AI_DeviceInfoHandle device_info); 178 #ifdef __cplusplus 179 } 180 #endif 181diff --git a/include/c_api/types_c.h b/include/c_api/types_c.h 182index fdf91f5a..d612eb97 100644 183--- a/include/c_api/types_c.h 184+++ b/include/c_api/types_c.h 185@@ -44,6 +44,42 @@ typedef enum OH_AI_DeviceType { 186 OH_AI_DEVICETYPE_INVALID = 100, 187 } OH_AI_DeviceType; 188 189+typedef enum OH_AI_NNRTDeviceType { 190+ /** Devices that are not CPU, GPU, or dedicated accelerator */ 191+ OH_AI_NNRTDEVICE_OTHERS = 0, 192+ /** CPU device */ 193+ OH_AI_NNRTDEVICE_CPU = 1, 194+ /** GPU device */ 195+ OH_AI_NNRTDEVICE_GPU = 2, 196+ /** Dedicated hardware accelerator */ 197+ OH_AI_NNRTDEVICE_ACCELERATOR = 3, 198+} OH_AI_NNRTDeviceType; 199+ 200+typedef enum OH_AI_PerformanceMode { 201+ /** No performance mode preference */ 202+ OH_AI_PERFORMANCE_NONE = 0, 203+ /** Low power consumption mode*/ 204+ OH_AI_PERFORMANCE_LOW = 1, 205+ /** Medium performance mode */ 206+ OH_AI_PERFORMANCE_MEDIUM = 2, 207+ /** High performance mode */ 208+ OH_AI_PERFORMANCE_HIGH = 3, 209+ /** Ultimate performance mode */ 210+ OH_AI_PERFORMANCE_EXTREME = 4 211+} OH_AI_PerformanceMode; 212+ 213+typedef enum OH_AI_Priority { 214+ /** No priority preference */ 215+ OH_AI_PRIORITY_NONE = 0, 216+ /** Low priority */ 217+ OH_AI_PRIORITY_LOW = 1, 218+ /** Medium priority */ 219+ OH_AI_PRIORITY_MEDIUM = 2, 220+ /** High priority */ 221+ OH_AI_PRIORITY_HIGH = 3 222+} OH_AI_Priority; 223+ 224+typedef struct NNRTDeviceDesc NNRTDeviceDesc; 225 #ifdef __cplusplus 226 } 227 #endif 228diff --git a/include/sdk_api/context.h b/include/sdk_api/context.h 229index 5bfc9279..bf0ff0a6 100644 230--- a/include/sdk_api/context.h 231+++ b/include/sdk_api/context.h 232@@ -174,6 +174,91 @@ OH_AI_API void OH_AI_DeviceInfoSetFrequency(OH_AI_DeviceInfoHandle device_info, 233 /// \return NPU frequency 234 OH_AI_API int OH_AI_DeviceInfoGetFrequency(const OH_AI_DeviceInfoHandle device_info); 235 236+/// \brief Obtain the all device descriptions in NNRT. 237+/// 238+/// \param[out] num Number of NNRT device description. 239+/// 240+/// \return NNRT device description array. 241+OH_AI_API NNRTDeviceDesc *OH_AI_GetAllNNRTDeviceDescs(size_t *num); 242+ 243+/// \brief Destroy the NNRT device descriptions returned by OH_AI_NNRTGetAllDeviceDescs(). 244+/// 245+/// \param[in] desc NNRT device description array. 246+OH_AI_API void OH_AI_DestroyAllNNRTDeviceDescs(NNRTDeviceDesc **desc); 247+ 248+/// \brief Obtain the device id in NNRT device description. 249+/// 250+/// \param[in] desc pointer to the NNRT device description instance. 251+/// 252+/// \return NNRT device id. 253+OH_AI_API size_t OH_AI_GetDeviceIdFromNNRTDeviceDesc(const NNRTDeviceDesc *desc); 254+ 255+/// \brief Obtain the device name in NNRT device description. 256+/// 257+/// \param[in] desc pointer to the NNRT device description instance. 258+/// 259+/// \return NNRT device name. 260+OH_AI_API const char *OH_AI_GetNameFromNNRTDeviceDesc(const NNRTDeviceDesc *desc); 261+ 262+/// \brief Obtain the device type in NNRT device description. 263+/// 264+/// \param[in] desc pointer to the NNRT device description instance. 265+/// 266+/// \return NNRT device type. 267+OH_AI_API OH_AI_NNRTDeviceType OH_AI_GetTypeFromNNRTDeviceDesc(const NNRTDeviceDesc *desc); 268+ 269+/// \brief Create the NNRT device info by exactly matching the specific device name. 270+/// 271+/// \param[in] name NNRt device name. 272+/// 273+/// \return Device info object handle. 274+OH_AI_API OH_AI_DeviceInfoHandle OH_AI_CreateNNRTDeviceInfoByName(const char *name); 275+ 276+/// \brief Create the NNRT device info by finding the first device with the specific device type. 277+/// 278+/// \param[in] name NNRt device type. 279+/// 280+/// \return Device info object handle. 281+OH_AI_API OH_AI_DeviceInfoHandle OH_AI_CreateNNRTDeviceInfoByType(OH_AI_NNRTDeviceType type); 282+ 283+/// \brief Set the NNRT device id, Only valid for NNRT. 284+/// 285+/// \param[in] device_info Device info object handle. 286+/// \param[in] device_id NNRT device id. 287+OH_AI_API void OH_AI_DeviceInfoSetDeviceId(OH_AI_DeviceInfoHandle device_info, size_t device_id); 288+ 289+/// \brief Obtain the NNRT device id, Only valid for NNRT. 290+/// 291+/// \param[in] device_info Device info object handle. 292+/// 293+/// \return NNRT device id. 294+OH_AI_API size_t OH_AI_DeviceInfoGetDeviceId(const OH_AI_DeviceInfoHandle device_info); 295+ 296+/// \brief Set the NNRT performance mode, Only valid for NNRT. 297+/// 298+/// \param[in] device_info Device info object handle. 299+/// \param[in] device_id NNRT performance mode. 300+OH_AI_API void OH_AI_DeviceInfoSetPerformanceMode(OH_AI_DeviceInfoHandle device_info, OH_AI_PerformanceMode mode); 301+ 302+/// \brief Obtain the NNRT performance mode, Only valid for NNRT. 303+/// 304+/// \param[in] device_info Device info object handle. 305+/// 306+/// \return NNRT performance mode. 307+OH_AI_API OH_AI_PerformanceMode OH_AI_DeviceInfoGetPerformanceMode(const OH_AI_DeviceInfoHandle device_info); 308+ 309+/// \brief Set the NNRT priority, Only valid for NNRT. 310+/// 311+/// \param[in] device_info Device info object handle. 312+/// \param[in] device_id NNRT priority. 313+OH_AI_API void OH_AI_DeviceInfoSetPriority(OH_AI_DeviceInfoHandle device_info, OH_AI_Priority priority); 314+ 315+/// \brief Obtain the NNRT priority, Only valid for NNRT. 316+/// 317+/// \param[in] device_info Device info object handle. 318+/// 319+/// \return NNRT priority. 320+OH_AI_API OH_AI_Priority OH_AI_DeviceInfoGetPriority(const OH_AI_DeviceInfoHandle device_info); 321 #ifdef __cplusplus 322 } 323 #endif 324diff --git a/include/sdk_api/types.h b/include/sdk_api/types.h 325index a39c6daa..d38660b0 100644 326--- a/include/sdk_api/types.h 327+++ b/include/sdk_api/types.h 328@@ -40,10 +40,46 @@ typedef enum OH_AI_DeviceType { 329 OH_AI_DEVICETYPE_KIRIN_NPU, 330 // add new type here 331 // ohos-only device range: [60, 80) 332- OH_AI_DeviceType_NNRT = 60, 333+ OH_AI_DEVICETYPE_NNRT = 60, 334 OH_AI_DEVICETYPE_INVALID = 100, 335 } OH_AI_DeviceType; 336 337+typedef enum OH_AI_NNRTDeviceType { 338+ /** Devices that are not CPU, GPU, or dedicated accelerator */ 339+ OH_AI_NNRTDEVICE_OTHERS = 0, 340+ /** CPU device */ 341+ OH_AI_NNRTDEVICE_CPU = 1, 342+ /** GPU device */ 343+ OH_AI_NNRTDEVICE_GPU = 2, 344+ /** Dedicated hardware accelerator */ 345+ OH_AI_NNRTDEVICE_ACCELERATOR = 3, 346+} OH_AI_NNRTDeviceType; 347+ 348+typedef enum OH_AI_PerformanceMode { 349+ /** No performance mode preference */ 350+ OH_AI_PERFORMANCE_NONE = 0, 351+ /** Low power consumption mode*/ 352+ OH_AI_PERFORMANCE_LOW = 1, 353+ /** Medium performance mode */ 354+ OH_AI_PERFORMANCE_MEDIUM = 2, 355+ /** High performance mode */ 356+ OH_AI_PERFORMANCE_HIGH = 3, 357+ /** Ultimate performance mode */ 358+ OH_AI_PERFORMANCE_EXTREME = 4 359+} OH_AI_PerformanceMode; 360+ 361+typedef enum OH_AI_Priority { 362+ /** No priority preference */ 363+ OH_AI_PRIORITY_NONE = 0, 364+ /** Low priority */ 365+ OH_AI_PRIORITY_LOW = 1, 366+ /** Medium priority */ 367+ OH_AI_PRIORITY_MEDIUM = 2, 368+ /** High priority */ 369+ OH_AI_PRIORITY_HIGH = 3 370+} OH_AI_Priority; 371+ 372+typedef struct NNRTDeviceDesc NNRTDeviceDesc; 373 #ifdef __cplusplus 374 } 375 #endif 376diff --git a/mindspore/lite/include/context.h b/mindspore/lite/include/context.h 377index 22bd24df..7e1e06f3 100644 378--- a/mindspore/lite/include/context.h 379+++ b/mindspore/lite/include/context.h 380@@ -52,6 +52,10 @@ typedef struct AscendDeviceInfo { 381 } AscendDeviceInfo; 382 383 typedef struct NNRtDeviceInfo { 384+ uint32_t device_id_ = 0; 385+ int priority_ = 0; 386+ int performance_mode_ = 0; 387+ bool enable_fp16_ = false; 388 } NNRtDeviceInfo; 389 390 /// \brief DeviceInfo defined for backend's configuration information. 391diff --git a/mindspore/lite/include/model.h b/mindspore/lite/include/model.h 392index 44cba37b..a54904c8 100644 393--- a/mindspore/lite/include/model.h 394+++ b/mindspore/lite/include/model.h 395@@ -24,6 +24,7 @@ namespace mindspore { 396 namespace schema { 397 struct Tensor; 398 } // namespace schema 399+ 400 namespace lite { 401 typedef enum { ModelType_MSLite, ModelType_MindIR } LiteModelType; 402 403@@ -61,7 +62,10 @@ struct LiteGraph { 404 bool model_obfuscated_ = false; 405 std::vector<unsigned char *> deobf_prims_; 406 #endif 407+ 408+ std::string ToString() const; 409 }; 410+ 411 struct Model { 412 LiteGraph graph_; 413 char *buf = nullptr; 414diff --git a/mindspore/lite/src/runtime/c_api/context_c.cc b/mindspore/lite/src/runtime/c_api/context_c.cc 415index d030e931..2a0a6d06 100644 416--- a/mindspore/lite/src/runtime/c_api/context_c.cc 417+++ b/mindspore/lite/src/runtime/c_api/context_c.cc 418@@ -14,8 +14,13 @@ 419 * limitations under the License. 420 */ 421 #include "include/c_api/context_c.h" 422+#include <string.h> 423 #include "src/runtime/c_api/context_c.h" 424+#include "src/runtime/c_api/type_c_private.h" 425 #include "src/common/log_adapter.h" 426+#ifdef SUPPORT_NNRT 427+#include "interfaces/kits/c/neural_network_runtime.h" 428+#endif 429 430 // ================ Context ================ 431 OH_AI_ContextHandle OH_AI_ContextCreate() { 432@@ -238,3 +243,194 @@ int OH_AI_DeviceInfoGetFrequency(const OH_AI_DeviceInfoHandle device_info) { // 433 return -1; 434 } 435 } 436+ 437+NNRTDeviceDesc *OH_AI_GetAllNNRTDeviceDescs(size_t *num) { 438+ if (num == nullptr) { 439+ MS_LOG(ERROR) << "Input num is null"; 440+ return nullptr; 441+ } 442+#ifdef SUPPORT_NNRT 443+ *num = 0; 444+ 445+ const size_t *all_device_ids; 446+ uint32_t device_count; 447+ auto ret = OH_NNDevice_GetAllDevicesID(&all_device_ids, &device_count); 448+ if ((ret != OH_NN_SUCCESS) || (device_count == 0)) { 449+ MS_LOG(ERROR) << "NNRT get all device id failed, ret: " << ret; 450+ return nullptr; 451+ } 452+ 453+ NNRTDeviceDesc *desc = (NNRTDeviceDesc *)malloc(sizeof(NNRTDeviceDesc) * device_count); 454+ if (desc == nullptr) { 455+ MS_LOG(ERROR) << "NNRT allocate desc failed"; 456+ return nullptr; 457+ } 458+ 459+ for (uint32_t i = 0; i < device_count; i++) { 460+ desc[i].device_id = all_device_ids[i]; 461+ OH_NN_DeviceType type; 462+ (void)OH_NNDevice_GetType(all_device_ids[i], &type); 463+ desc[i].device_type = static_cast<OH_AI_NNRTDeviceType>(type); 464+ 465+ const char *name = nullptr; 466+ (void)OH_NNDevice_GetName(all_device_ids[i], &name); 467+ desc[i].device_name[127] = '\0'; 468+ strncpy(desc[i].device_name, name, 127); 469+ } 470+ *num = device_count; 471+ return desc; 472+#else 473+ return nullptr; 474+#endif 475+} 476+ 477+void OH_AI_DestroyAllNNRTDeviceDescs(NNRTDeviceDesc **desc) { 478+ if (desc == nullptr) { 479+ MS_LOG(WARNING) << "desc is null"; 480+ return; 481+ } 482+ free(*desc); 483+ *desc = nullptr; 484+} 485+ 486+size_t OH_AI_GetDeviceIdFromNNRTDeviceDesc(const NNRTDeviceDesc *desc) { 487+ if (desc == nullptr) { 488+ MS_LOG(ERROR) << "NNRT desc is null"; 489+ return 0; 490+ } 491+ return desc->device_id; 492+} 493+ 494+const char *OH_AI_GetNameFromNNRTDeviceDesc(const NNRTDeviceDesc *desc) { 495+ if (desc == nullptr) { 496+ MS_LOG(ERROR) << "NNRT desc is null"; 497+ return nullptr; 498+ } 499+ return desc->device_name; 500+} 501+ 502+OH_AI_NNRTDeviceType OH_AI_GetTypeFromNNRTDeviceDesc(const NNRTDeviceDesc *desc) { 503+ if (desc == nullptr) { 504+ MS_LOG(ERROR) << "NNRT desc is null"; 505+ return OH_AI_NNRTDeviceType::OH_AI_NNRTDEVICE_OTHERS; 506+ } 507+ return desc->device_type; 508+} 509+ 510+OH_AI_DeviceInfoHandle OH_AI_CreateNNRTDeviceInfoByName(const char *name) { 511+ size_t num = 0; 512+ NNRTDeviceDesc *desc = OH_AI_GetAllNNRTDeviceDescs(&num); 513+ if (desc == nullptr) { 514+ MS_LOG(ERROR) << "Get all device desc failed"; 515+ return nullptr; 516+ } 517+ 518+ OH_AI_DeviceInfoHandle handle = nullptr; 519+ for (size_t i = 0; i < num; i++) { 520+ if (strncmp(desc[i].device_name, name, NNRT_DEVICE_NAME_MAX - 1) == 0) { 521+ handle = OH_AI_DeviceInfoCreate(OH_AI_DEVICETYPE_NNRT); 522+ OH_AI_DeviceInfoSetDeviceId(handle, desc[i].device_id); 523+ break; 524+ } 525+ } 526+ OH_AI_DestroyAllNNRTDeviceDescs(&desc); 527+ return handle; 528+} 529+ 530+OH_AI_DeviceInfoHandle OH_AI_CreateNNRTDeviceInfoByType(OH_AI_NNRTDeviceType type) { 531+ size_t num = 0; 532+ NNRTDeviceDesc *desc = OH_AI_GetAllNNRTDeviceDescs(&num); 533+ if (desc == nullptr) { 534+ MS_LOG(ERROR) << "Get all device desc failed"; 535+ return nullptr; 536+ } 537+ 538+ OH_AI_DeviceInfoHandle handle = nullptr; 539+ for (size_t i = 0; i < num; i++) { 540+ if (desc[i].device_type == type) { 541+ handle = OH_AI_DeviceInfoCreate(OH_AI_DEVICETYPE_NNRT); 542+ OH_AI_DeviceInfoSetDeviceId(handle, desc[i].device_id); 543+ break; 544+ } 545+ } 546+ OH_AI_DestroyAllNNRTDeviceDescs(&desc); 547+ return handle; 548+} 549+ 550+void OH_AI_DeviceInfoSetDeviceId(OH_AI_DeviceInfoHandle device_info, size_t device_id) { 551+ if (device_info == nullptr) { 552+ MS_LOG(ERROR) << "device info is null"; 553+ return; 554+ } 555+ if (OH_AI_DeviceInfoGetDeviceType(device_info) != OH_AI_DEVICETYPE_NNRT) { 556+ MS_LOG(ERROR) << "Set device_id of non-NNRT device is not allowable, ignored"; 557+ return; 558+ } 559+ auto impl = reinterpret_cast<mindspore::DeviceInfoC *>(device_info); 560+ impl->device_id = device_id; 561+} 562+ 563+size_t OH_AI_DeviceInfoGetDeviceId(const OH_AI_DeviceInfoHandle device_info) { 564+ if (device_info == nullptr) { 565+ MS_LOG(ERROR) << "device info is null"; 566+ return 0; 567+ } 568+ if (OH_AI_DeviceInfoGetDeviceType(device_info) != OH_AI_DEVICETYPE_NNRT) { 569+ MS_LOG(ERROR) << "Get device_id of non-NNRT device is not allowable, ignored"; 570+ return 0; 571+ } 572+ auto impl = reinterpret_cast<mindspore::DeviceInfoC *>(device_info); 573+ return impl->device_id; 574+} 575+ 576+void OH_AI_DeviceInfoSetPerformanceMode(OH_AI_DeviceInfoHandle device_info, OH_AI_PerformanceMode mode) { 577+ if (device_info == nullptr) { 578+ MS_LOG(ERROR) << "device info is null"; 579+ return; 580+ } 581+ if (OH_AI_DeviceInfoGetDeviceType(device_info) != OH_AI_DEVICETYPE_NNRT) { 582+ MS_LOG(ERROR) << "Set performance_mode of non-NNRT device is not allowable, ignored"; 583+ return; 584+ } 585+ auto impl = reinterpret_cast<mindspore::DeviceInfoC *>(device_info); 586+ impl->performance_mode = mode; 587+} 588+ 589+OH_AI_PerformanceMode OH_AI_DeviceInfoGetPerformanceMode(const OH_AI_DeviceInfoHandle device_info) { 590+ if (device_info == nullptr) { 591+ MS_LOG(ERROR) << "device info is null"; 592+ return OH_AI_PERFORMANCE_NONE; 593+ } 594+ if (OH_AI_DeviceInfoGetDeviceType(device_info) != OH_AI_DEVICETYPE_NNRT) { 595+ MS_LOG(ERROR) << "Get performance_mode of non-NNRT device is not allowable, ignored"; 596+ return OH_AI_PERFORMANCE_NONE; 597+ } 598+ auto impl = reinterpret_cast<mindspore::DeviceInfoC *>(device_info); 599+ return impl->performance_mode; 600+} 601+ 602+void OH_AI_DeviceInfoSetPriority(OH_AI_DeviceInfoHandle device_info, OH_AI_Priority priority) { 603+ if (device_info == nullptr) { 604+ MS_LOG(ERROR) << "device info is null"; 605+ return; 606+ } 607+ if (OH_AI_DeviceInfoGetDeviceType(device_info) != OH_AI_DEVICETYPE_NNRT) { 608+ MS_LOG(ERROR) << "Set priority of non-NNRT device is not allowable, ignored"; 609+ return; 610+ } 611+ auto impl = reinterpret_cast<mindspore::DeviceInfoC *>(device_info); 612+ impl->priority = priority; 613+} 614+ 615+OH_AI_Priority OH_AI_DeviceInfoGetPriority(const OH_AI_DeviceInfoHandle device_info) { 616+ if (device_info == nullptr) { 617+ MS_LOG(ERROR) << "device info is null"; 618+ return OH_AI_PRIORITY_NONE; 619+ } 620+ if (OH_AI_DeviceInfoGetDeviceType(device_info) != OH_AI_DEVICETYPE_NNRT) { 621+ MS_LOG(ERROR) << "Get priority of non-NNRT device is not allowable, ignored"; 622+ return OH_AI_PRIORITY_NONE; 623+ } 624+ auto impl = reinterpret_cast<mindspore::DeviceInfoC *>(device_info); 625+ return impl->priority; 626+} 627diff --git a/mindspore/lite/src/runtime/c_api/context_c.h b/mindspore/lite/src/runtime/c_api/context_c.h 628index 7b9db3ea..0fb2f3e7 100644 629--- a/mindspore/lite/src/runtime/c_api/context_c.h 630+++ b/mindspore/lite/src/runtime/c_api/context_c.h 631@@ -29,6 +29,9 @@ typedef struct DeviceInfoC { 632 OH_AI_DeviceType device_type; 633 bool enable_fp16 = false; 634 int frequency = 3; 635+ size_t device_id = 0; 636+ OH_AI_PerformanceMode performance_mode = OH_AI_PERFORMANCE_NONE; 637+ OH_AI_Priority priority = OH_AI_PRIORITY_NONE; 638 std::string provider; 639 std::string provider_device; 640 std::shared_ptr<Allocator> allocator = nullptr; 641diff --git a/mindspore/lite/src/runtime/c_api/type_c_private.h b/mindspore/lite/src/runtime/c_api/type_c_private.h 642new file mode 100644 643index 00000000..b3b64748 644--- /dev/null 645+++ b/mindspore/lite/src/runtime/c_api/type_c_private.h 646@@ -0,0 +1,40 @@ 647+/** 648+ * Copyright 2023 Huawei Technologies Co., Ltd 649+ * 650+ * Licensed under the Apache License, Version 2.0 (the "License"); 651+ * you may not use this file except in compliance with the License. 652+ * You may obtain a copy of the License at 653+ * 654+ * http://www.apache.org/licenses/LICENSE-2.0 655+ * 656+ * Unless required by applicable law or agreed to in writing, software 657+ * distributed under the License is distributed on an "AS IS" BASIS, 658+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 659+ * See the License for the specific language governing permissions and 660+ * limitations under the License. 661+ */ 662+#ifndef MINDSPORE_LITE_SRC_RUNTIME_C_API_TYPE_C_PRIVATE_H_ 663+#define MINDSPORE_LITE_SRC_RUNTIME_C_API_TYPE_C_PRIVATE_H_ 664+ 665+#include <string> 666+#include <vector> 667+#include <memory> 668+#include <stddef.h> 669+#include "include/c_api/types_c.h" 670+ 671+#ifdef __cplusplus 672+extern "C" { 673+#endif 674+ 675+#define NNRT_DEVICE_NAME_MAX (128) 676+ 677+struct NNRTDeviceDesc { 678+ size_t device_id; 679+ OH_AI_NNRTDeviceType device_type; 680+ char device_name[NNRT_DEVICE_NAME_MAX]; 681+}; 682+ 683+#ifdef __cplusplus 684+} 685+#endif 686+#endif // MINDSPORE_LITE_SRC_RUNTIME_C_API_TYPE_C_PRIVATE_H_ 687diff --git a/mindspore/lite/src/runtime/cxx_api/context.cc b/mindspore/lite/src/runtime/cxx_api/context.cc 688index d550975b..6ac926b9 100644 689--- a/mindspore/lite/src/runtime/cxx_api/context.cc 690+++ b/mindspore/lite/src/runtime/cxx_api/context.cc 691@@ -48,6 +48,10 @@ constexpr auto KModelOptionAscendFusionSwitchCfgPath = "mindspore.option.ascend. 692 constexpr auto kModelOptionAscendDynamicBatchSize = "mindspore.option.ascend.dynamic_batch_size"; 693 constexpr auto kModelOptionAscendDynamicImageSize = "mindspore.option.ascend.dynamic_image_size"; 694 constexpr auto kModelOptionAscendBufferOptimize = "mindspore.option.ascend.buffer_optimize"; 695+constexpr auto kModelOptionNNRTDeviceID = "mindspore.option.nnrt.device_id"; 696+constexpr auto kModelOptionNNRTPerformanceMode = "mindspore.option.nnrt.performance_mode"; 697+constexpr auto kModelOptionNNRTPriority = "mindspore.option.nnrt.priority"; 698+constexpr auto kModelOptionNNRTEnableFP16 = "mindspore.option.nnrt.enable_fp16"; 699 700 Context::Context() : data_(std::make_shared<Data>()) {} 701 702@@ -586,4 +590,68 @@ std::vector<char> AscendDeviceInfo::GetBufferOptimizeModeChar() const { 703 const std::string &ref = GetValue<std::string>(data_, kModelOptionAscendBufferOptimize); 704 return StringToChar(ref); 705 } 706+ 707+void NNRTDeviceInfo::SetDeviceID(size_t device_id) { 708+ if (data_ == nullptr) { 709+ MS_LOG(ERROR) << "Invalid context."; 710+ return; 711+ } 712+ data_->params[kModelOptionNNRTDeviceID] = device_id; 713+} 714+ 715+size_t NNRTDeviceInfo::GetDeviceID() const { 716+ if (data_ == nullptr) { 717+ MS_LOG(ERROR) << "Invalid context."; 718+ return 0; 719+ } 720+ return GetValue<size_t>(data_, kModelOptionNNRTDeviceID); 721+} 722+ 723+void NNRTDeviceInfo::SetPerformanceMode(int performance_mode) { 724+ if (data_ == nullptr) { 725+ MS_LOG(ERROR) << "Invalid context."; 726+ return; 727+ } 728+ data_->params[kModelOptionNNRTPerformanceMode] = performance_mode; 729+} 730+ 731+int NNRTDeviceInfo::GetPerformanceMode() const { 732+ if (data_ == nullptr) { 733+ MS_LOG(ERROR) << "Invalid context."; 734+ return 0; 735+ } 736+ return GetValue<int>(data_, kModelOptionNNRTPerformanceMode); 737+} 738+ 739+void NNRTDeviceInfo::SetPriority(int priority) { 740+ if (data_ == nullptr) { 741+ MS_LOG(ERROR) << "Invalid context."; 742+ return; 743+ } 744+ data_->params[kModelOptionNNRTPriority] = priority; 745+} 746+ 747+int NNRTDeviceInfo::GetPriority() const { 748+ if (data_ == nullptr) { 749+ MS_LOG(ERROR) << "Invalid context."; 750+ return 0; 751+ } 752+ return GetValue<int>(data_, kModelOptionNNRTPriority); 753+} 754+ 755+void NNRTDeviceInfo::SetEnableFP16(bool is_fp16) { 756+ if (data_ == nullptr) { 757+ MS_LOG(ERROR) << "Invalid context."; 758+ return; 759+ } 760+ data_->params[kModelOptionNNRTEnableFP16] = is_fp16; 761+} 762+ 763+bool NNRTDeviceInfo::GetEnableFP16() const { 764+ if (data_ == nullptr) { 765+ MS_LOG(ERROR) << "Invalid context."; 766+ return false; 767+ } 768+ return GetValue<bool>(data_, kModelOptionNNRTEnableFP16); 769+} 770 } // namespace mindspore 771diff --git a/mindspore/lite/src/runtime/cxx_api/converters.cc b/mindspore/lite/src/runtime/cxx_api/converters.cc 772index 23a02778..5f2bd40f 100644 773--- a/mindspore/lite/src/runtime/cxx_api/converters.cc 774+++ b/mindspore/lite/src/runtime/cxx_api/converters.cc 775@@ -72,8 +72,13 @@ Status ContextUtils::AddAscendDevice(lite::InnerContext *inner_context, DeviceIn 776 return kSuccess; 777 } 778 779-Status ContextUtils::AddNNRtDevice(lite::InnerContext *inner_context) { 780+Status ContextUtils::AddNNRtDevice(lite::InnerContext *inner_context, size_t device_id, int performance_mode, 781+ int priority, bool enable_fp16) { 782 lite::DeviceInfo device_info = {0}; 783+ device_info.nnrt_device_info_.device_id_ = device_id; 784+ device_info.nnrt_device_info_.performance_mode_ = performance_mode; 785+ device_info.nnrt_device_info_.priority_ = priority; 786+ device_info.nnrt_device_info_.enable_fp16_ = enable_fp16; 787 inner_context->device_list_.push_back({lite::DT_NNRT, device_info}); 788 return kSuccess; 789 } 790@@ -122,7 +127,10 @@ lite::InnerContext *ContextUtils::Convert(Context *context) { 791 } else if (device->GetDeviceType() == kAscend) { 792 ret = AddAscendDevice(inner_context.get(), device.get()); 793 } else if (device->GetDeviceType() == kNNRt) { 794- ret = AddNNRtDevice(inner_context.get()); 795+ auto nnrt_device_info = device->Cast<NNRTDeviceInfo>(); 796+ ret = AddNNRtDevice(inner_context.get(), nnrt_device_info->GetDeviceID(), 797+ nnrt_device_info->GetPerformanceMode(), nnrt_device_info->GetPriority(), 798+ nnrt_device_info->GetEnableFP16()); 799 } 800 if (ret != kSuccess) { 801 MS_LOG(ERROR) << "Add device failed!"; 802@@ -162,7 +170,8 @@ lite::InnerContext *ContextUtils::Convert(const ContextC *context_c) { 803 } else if (device_info_c->device_type == OH_AI_DEVICETYPE_KIRIN_NPU) { 804 ret = AddNpuDevice(device_info_c->frequency, inner_context.get()); 805 } else if (device_info_c->device_type == OH_AI_DEVICETYPE_NNRT) { 806- ret = AddNNRtDevice(inner_context.get()); 807+ ret = AddNNRtDevice(inner_context.get(), device_info_c->device_id, device_info_c->performance_mode, 808+ device_info_c->priority, device_info_c->enable_fp16); 809 } 810 if (ret != kSuccess) { 811 MS_LOG(ERROR) << "Add device failed!"; 812diff --git a/mindspore/lite/src/runtime/cxx_api/converters.h b/mindspore/lite/src/runtime/cxx_api/converters.h 813index 11338875..bd7daabb 100644 814--- a/mindspore/lite/src/runtime/cxx_api/converters.h 815+++ b/mindspore/lite/src/runtime/cxx_api/converters.h 816@@ -45,7 +45,7 @@ class ContextUtils { 817 lite::InnerContext *inner_context); 818 static Status AddNpuDevice(int frequency, lite::InnerContext *inner_context); 819 static Status AddAscendDevice(lite::InnerContext *inner_context, DeviceInfoContext *device); 820- static Status AddNNRtDevice(lite::InnerContext *inner_context); 821+ static Status AddNNRtDevice(lite::InnerContext *inner_context, size_t device_id, int performance_mode, int priority, bool enable_fp16); 822 static bool IsAffinityModeValid(int affinity_mode) { 823 return affinity_mode >= lite::NO_BIND && affinity_mode <= lite::MID_CPU; 824 } 825diff --git a/mindspore/lite/src/runtime/delegate/nnrt/nnrt_delegate.cc b/mindspore/lite/src/runtime/delegate/nnrt/nnrt_delegate.cc 826index 67d4e6c4..8b6e5ba4 100644 827--- a/mindspore/lite/src/runtime/delegate/nnrt/nnrt_delegate.cc 828+++ b/mindspore/lite/src/runtime/delegate/nnrt/nnrt_delegate.cc 829@@ -13,6 +13,9 @@ 830 * See the License for the specific language governing permissions and 831 * limitations under the License. 832 */ 833+ 834+#include <unordered_set> 835+#include <numeric> 836 #include "nnrt_delegate.h" 837 #include "checker/primitive_check.h" 838 #include "src/common/log_adapter.h" 839@@ -21,139 +24,432 @@ 840 #include "nnrt_model_kernel.h" 841 #include "schema/model_generated.h" 842 #include "flatbuffers/flatbuffers.h" 843+#include "runtime/tensor_category.h" 844 845-mindspore::Status mindspore::NNRTDelegate::Build(DelegateModel<schema::Primitive> *model) { 846+namespace mindspore { 847+namespace lite { 848+Status NNRTDelegate::Build(DelegateModel<schema::Primitive> *model) { 849 MS_LOG(DEBUG) << "Start to build NNRT model."; 850- if (this->nnrt_lite_graph == nullptr) { 851- MS_LOG(ERROR) << "nnrt_lite_graph is nullptr."; 852- return mindspore::kLiteError; 853+ if ((lite_graph_ == nullptr) || (lite_graph_->sub_graphs_.size() > 1)) { 854+ MS_LOG(WARNING) << "LiteGraph contains more than one subgraph. NNRT does not support control-flow model yet, fallback to CPU"; 855+ return kSuccess; 856 } 857- if (this->nnrt_lite_graph->sub_graphs_.empty()) { 858- // must have at lease one subgraph 859- MS_LOG(ERROR) << "must have at lease one subgraph"; 860- return mindspore::kLiteError; 861+ 862+ OH_NNModel *full_model = BuildFullNNModel(); 863+ if (full_model == nullptr) { 864+ MS_LOG(WARNING) << "Build full NNModel failed, fallback to CPU"; 865+ return kSuccess; 866 } 867- OH_NN_ReturnCode ret_code; 868- OH_NNModel *oh_nnmodel = OH_NNModel_Construct(); 869- if (oh_nnmodel == nullptr) { 870- MS_LOG(ERROR) << "Construct NNModel failed, oh_nnmodel is nullptr."; 871- return mindspore::kLiteError; 872+ std::vector<bool> op_supports = QueryOpSupports(full_model); 873+ if (op_supports.empty()) { 874+ MS_LOG(WARNING) << "Query no op supports for full model, fallback to CPU"; 875+ OH_NNModel_Destroy(&full_model); 876+ return kSuccess; 877 } 878+ auto nnrt_subgraph_ranges = GetNNRTSubgraphRanges(model, op_supports); 879+ MS_LOG(INFO) << "Found NNRT subgraph count: " << nnrt_subgraph_ranges.size(); 880 881- ret_code = OH_NNModel_BuildFromLiteGraph(oh_nnmodel, this->nnrt_lite_graph); 882- if (ret_code != OH_NN_SUCCESS) { 883- MS_LOG(ERROR) << "Build NNModel failed, OH_NN_ReturnCode = " << ret_code; 884- OH_NNModel_Destroy(&oh_nnmodel); 885- return mindspore::kLiteError; 886+ std::vector<LiteGraph *> sub_lite_graphs; 887+ auto ret = CreateLiteGraphForNNRTSubgraph(nnrt_subgraph_ranges, &sub_lite_graphs); 888+ if (ret != kSuccess) { 889+ OH_NNModel_Destroy(&full_model); 890+ MS_LOG(WARNING) << "Create NNRT sub LiteGraph failed, fallback to CPU"; 891+ return kSuccess; 892 } 893- MS_LOG(DEBUG) << "NNRTDelegate creates NNModel success."; 894 895- OH_NNCompilation *oh_nn_compilation = nullptr; 896- oh_nn_compilation = OH_NNCompilation_Construct(oh_nnmodel); 897+ std::vector<NNRTModelKernel *> nnrt_subgraph_kernels; 898+ ret = CreateNNRTSubgraphKernels(model, sub_lite_graphs, nnrt_subgraph_ranges, &nnrt_subgraph_kernels); 899+ if (ret != kSuccess) { 900+ OH_NNModel_Destroy(&full_model); 901+ MS_LOG(WARNING) << "Create NNRT subgraph kernel failed, fallback to CPU"; 902+ return kSuccess; 903+ } 904+ 905+ ReplaceNNRTKernelsInDelegateModel(model, nnrt_subgraph_ranges, nnrt_subgraph_kernels); 906+ OH_NNModel_Destroy(&full_model); 907+ MS_LOG(INFO) << "NNRTDelegate build success."; 908+ return kSuccess; 909+} 910+ 911+OH_NNModel *NNRTDelegate::BuildFullNNModel() { 912+ if (lite_graph_ == nullptr) { 913+ MS_LOG(ERROR) << "Lite graph is null"; 914+ return nullptr; 915+ } 916 917- if (oh_nn_compilation == nullptr) { 918- MS_LOG(ERROR) << "Construct NNCompilation failed"; 919- OH_NNModel_Destroy(&oh_nnmodel); 920- return mindspore::kLiteError; 921+ if (lite_graph_->sub_graphs_.empty()) { 922+ MS_LOG(ERROR) << "Lite graph must have at lease one subgraph"; 923+ return nullptr; 924 } 925- MS_LOG(DEBUG) << "NNRTDelegate creates NNCompilation success."; 926 927- const size_t *allDevicesID = nullptr; 928- uint32_t device_count = 0; 929- ret_code = OH_NNDevice_GetAllDevicesID(&allDevicesID, &device_count); 930- if (ret_code != OH_NN_SUCCESS) { 931- MS_LOG(ERROR) << "NNModel GetAllDevicesID failed, OH_NN_ReturnCode = " << ret_code; 932- OH_NNCompilation_Destroy(&oh_nn_compilation); 933- OH_NNModel_Destroy(&oh_nnmodel); 934- return mindspore::kLiteError; 935+ OH_NNModel *nn_model = OH_NNModel_Construct(); 936+ if (nn_model == nullptr) { 937+ MS_LOG(ERROR) << "Create NNModel failed, result is nullptr"; 938+ return nullptr; 939 } 940 941- if (device_count <= 0) { 942- MS_LOG(WARNING) << "No NNRt Device found, fall back to CPU. "; 943- // OH_NNCompilation_Destroy(&oh_nn_compilation); 944- // OH_NNModel_Destroy(&oh_nnmodel); 945- return mindspore::kSuccess; 946+ auto ret = OH_NNModel_BuildFromLiteGraph(nn_model, lite_graph_); 947+ if (ret != OH_NN_SUCCESS) { 948+ MS_LOG(ERROR) << "Build NNModel failed, ret: " << ret; 949+ OH_NNModel_Destroy(&nn_model); 950+ return nullptr; 951 } 952- MS_LOG(DEBUG) << "NNRTDelegate GetAllDevicesID success. device_count: " << device_count; 953+ return nn_model; 954+} 955 956- // check if model ops are supported 957- const bool *issupported = nullptr; 958+std::vector<bool> NNRTDelegate::QueryOpSupports(OH_NNModel *nn_model) { 959+ const bool *is_supported = nullptr; // Note: this memory is owned by nn_model, don't free alone. 960 uint32_t op_count = 0; 961- ret_code = OH_NNModel_GetAvailableOperations(oh_nnmodel, allDevicesID[0], &issupported, &op_count); 962- if (ret_code != OH_NN_SUCCESS) { 963- MS_LOG(ERROR) << "NNModel GetAvailableOperations failed, OH_NN_ReturnCode = " << ret_code 964- << ", maybe due to dataParcel data length limitaion. Fall back to CPU."; 965- OH_NNCompilation_Destroy(&oh_nn_compilation); 966- OH_NNModel_Destroy(&oh_nnmodel); 967- return mindspore::kSuccess; 968+ auto ret = OH_NNModel_GetAvailableOperations(nn_model, nnrt_device_info_.device_id_, &is_supported, &op_count); 969+ if (ret != OH_NN_SUCCESS) { 970+ MS_LOG(WARNING) << "NNModel GetAvailableOperations failed, ret: " << ret 971+ << ", maybe caused by dataParcel data length limitation"; 972+ return {}; 973 } 974- uint32_t supported_op_count = 0; 975- for (uint32_t i = 0; i < op_count; i++) { 976- if (issupported[i]) { 977- supported_op_count++; 978+ std::vector<bool> op_supports(is_supported, is_supported + op_count); 979+ return op_supports; 980+} 981+ 982+/* Find continuous sub-sequence in op_supports. */ 983+std::vector<NNRTOpRange> NNRTDelegate::GetNNRTSubgraphRanges(DelegateModel<schema::Primitive> *model, 984+ const std::vector<bool> &op_supports) { 985+ std::vector<NNRTOpRange> nnrt_subgraph_ranges; 986+ NNRTOpRange op_range; 987+ bool start_count = false; 988+ for (size_t i = 0; i < op_supports.size(); i++) { 989+ if (op_supports[i]) { 990+ if (start_count == false) { 991+ start_count = true; 992+ op_range.begin_index_ = i; 993+ op_range.begin_iter_ = model->BeginKernelIterator() + i; 994+ } 995+ } else { 996+ if (start_count == true) { 997+ start_count = false; 998+ op_range.end_index_ = i; 999+ op_range.end_iter_ = model->BeginKernelIterator() + i; 1000+ nnrt_subgraph_ranges.push_back(op_range); 1001+ } 1002 } 1003 } 1004- if (op_count != supported_op_count) { 1005- MS_LOG(WARNING) << "this model has " << op_count << "ops, but NNRT only support " << supported_op_count 1006- << " ops, fall back to CPU."; 1007- // must support all op, else fall back to CPU 1008- OH_NNCompilation_Destroy(&oh_nn_compilation); 1009- OH_NNModel_Destroy(&oh_nnmodel); 1010- return mindspore::kSuccess; 1011+ // handle last true subsequence 1012+ if (start_count == true) { 1013+ op_range.end_index_ = op_supports.size(); 1014+ op_range.end_iter_ = model->EndKernelIterator(); 1015+ nnrt_subgraph_ranges.push_back(op_range); 1016+ MS_LOG(INFO) << "Schedule NNRT subgraph range: [" << op_range.begin_index_ << ", " << op_range.end_index_ << ")"; 1017 } 1018- MS_LOG(DEBUG) << "NNRtDelegate supports all op in this model."; 1019+ return nnrt_subgraph_ranges; 1020+} 1021 1022- ret_code = OH_NNCompilation_SetDevice(oh_nn_compilation, allDevicesID[0]); 1023+/** 1024+ * This method ONLY works when the follow pre-conditions are satisfied: 1025+ * 1. The node order of lite_graph_->all_nodes should be consistent with DelegateModel sequence. 1026+ * This ensures the kernel replacement in DelegateModel based on the re-organizing info from lite_graph_ is correct. 1027+ * 2. The node indices of lite_graph_->sub_graphs[0].node_indices should be monotonically increasing from 0 to size - 1. 1028+ */ 1029+Status NNRTDelegate::CreateLiteGraphForNNRTSubgraph( 1030+ const std::vector<NNRTOpRange> &nnrt_op_ranges, 1031+ std::vector<LiteGraph *> *sub_lite_graphs) { 1032+ MS_LOG(INFO) << "Start creating LiteGraph for NNRT subgraph"; 1033+ for (const auto &op_range: nnrt_op_ranges) { 1034+ MS_LOG(INFO) << "Process op range: [" << op_range.begin_index_ << ", " << op_range.end_index_ << ")"; 1035+ LiteGraph *sub_lite_graph = new (std::nothrow)LiteGraph; 1036+ if (sub_lite_graph == nullptr) { 1037+ MS_LOG(ERROR) << "Allocate LiteGraph failed"; 1038+ return kLiteError; 1039+ } 1040+ sub_lite_graph->name_ = lite_graph_->name_; 1041+ sub_lite_graph->version_ = lite_graph_->version_; 1042 1043- if (ret_code != OH_NN_SUCCESS) { 1044- MS_LOG(ERROR) << "NNCompilation SetDevice failed, OH_NN_ReturnCode = " << ret_code; 1045- OH_NNCompilation_Destroy(&oh_nn_compilation); 1046- OH_NNModel_Destroy(&oh_nnmodel); 1047- return mindspore::kLiteError; 1048+ auto sub_graph = new (std::nothrow)LiteGraph::SubGraph; 1049+ if (sub_graph == nullptr) { 1050+ MS_LOG(ERROR) << "Allocate SubGraph failed"; 1051+ return kLiteError; 1052+ } 1053+ sub_graph->name_ = lite_graph_->name_; 1054+ sub_lite_graph->sub_graphs_.push_back(sub_graph); 1055+ 1056+ // deal with all_nodes 1057+ MS_LOG(INFO) << "Assemble all_nodes..."; 1058+ int new_node_index = 0; 1059+ std::map<uint32_t, schema::Tensor *> in_tensor_index_map; 1060+ std::map<uint32_t, schema::Tensor *> out_tensor_index_map; 1061+ for (size_t index = op_range.begin_index_; index < op_range.end_index_; index++) { 1062+ LiteGraph::Node *node = new (std::nothrow)LiteGraph::Node; 1063+ if (node == nullptr) { 1064+ MS_LOG(ERROR) << "Allocate Node failed"; 1065+ return kLiteError; 1066+ } 1067+ *node = *lite_graph_->all_nodes_[index]; 1068+ sub_lite_graph->all_nodes_.push_back(node); 1069+ sub_graph->node_indices_.push_back(new_node_index++); 1070+ 1071+ for (auto i: node->input_indices_) { 1072+ in_tensor_index_map.emplace(i, lite_graph_->all_tensors_[i]); 1073+ } 1074+ for (auto i: node->output_indices_) { 1075+ out_tensor_index_map.emplace(i, lite_graph_->all_tensors_[i]); 1076+ } 1077+ } 1078+ 1079+ // deal with all_tensors 1080+ MS_LOG(INFO) << "Assemble all_tensors..."; 1081+ std::set<schema::Tensor *> tensors; 1082+ for (auto iter: in_tensor_index_map) { 1083+ tensors.emplace(iter.second); 1084+ } 1085+ for (auto iter: out_tensor_index_map) { 1086+ tensors.emplace(iter.second); 1087+ } 1088+ 1089+ uint32_t new_index = 0; 1090+ std::map<schema::Tensor *, uint32_t> new_tensor_maps; 1091+ for (auto tensor: tensors) { 1092+ new_tensor_maps.emplace(tensor, new_index++); 1093+ } 1094+ 1095+ sub_lite_graph->all_tensors_ = std::vector<schema::Tensor *>(tensors.begin(), tensors.end()); 1096+ 1097+ // deal with every node's input/output indices 1098+ MS_LOG(INFO) << "Set input/output indices of each node..."; 1099+ for (auto node: sub_lite_graph->all_nodes_) { 1100+ for (auto &index : node->input_indices_) { 1101+ index = new_tensor_maps.at(in_tensor_index_map.at(index)); 1102+ } 1103+ for (auto &index : node->output_indices_) { 1104+ index = new_tensor_maps.at(out_tensor_index_map.at(index)); 1105+ } 1106+ } 1107+ 1108+ // deal with subgraph's input/output indices 1109+ MS_LOG(INFO) << "Set input/output indices of each subgraph..."; 1110+ sub_graph->tensor_indices_ = std::vector<uint32_t>(tensors.size()); 1111+ std::iota(sub_graph->tensor_indices_.begin(), sub_graph->tensor_indices_.end(), 0U); 1112+ 1113+ for (auto iter: in_tensor_index_map) { 1114+ auto new_tensor_index = new_tensor_maps[iter.second]; 1115+ MS_LOG(DEBUG) << "handle input: old: " << iter.first << ", new: " << new_tensor_index << std::endl; 1116+ if (IsConstTensor(*iter.second)) { 1117+ MS_LOG(DEBUG) << "- tensor: " << new_tensor_index << " is const." << std::endl; 1118+ continue; 1119+ } 1120+ 1121+ bool is_subgraph_input = true; 1122+ for (auto node: sub_lite_graph->all_nodes_) { 1123+ if (std::find(node->output_indices_.begin(), node->output_indices_.end(), new_tensor_index) != 1124+ node->output_indices_.end()) { 1125+ is_subgraph_input = false; 1126+ MS_LOG(DEBUG) << "- tensor: " << new_tensor_index << " is not subgraph input." << std::endl; 1127+ break; 1128+ } 1129+ } 1130+ if (is_subgraph_input) { 1131+ sub_graph->input_indices_.push_back(new_tensor_index); 1132+ MS_LOG(DEBUG) << "- select tensor: " << new_tensor_index << " as subgraph input." << std::endl; 1133+ } 1134+ } 1135+ 1136+ for (auto iter: out_tensor_index_map) { 1137+ int new_tensor_index = new_tensor_maps.at(iter.second); 1138+ MS_LOG(DEBUG) << "handle output: old: " << iter.first << ", new: " << new_tensor_index << std::endl; 1139+ if (IsConstTensor(*iter.second)) { 1140+ MS_LOG(DEBUG) << "- tensor: " << new_tensor_index << " is const." << std::endl; 1141+ continue; 1142+ } 1143+ 1144+ bool is_subgraph_output = false; 1145+ for (size_t i = 0; i < lite_graph_->all_nodes_.size(); i++) { 1146+ if ((i >= op_range.begin_index_) && (i < op_range.end_index_)) { 1147+ continue; 1148+ } 1149+ auto node = lite_graph_->all_nodes_[i]; 1150+ if (std::find(node->input_indices_.begin(), node->input_indices_.end(), iter.first) != 1151+ node->input_indices_.end()) { // As the input of node which does not belong to the subgraph. 1152+ is_subgraph_output = true; 1153+ MS_LOG(DEBUG) << "- tensor: " << new_tensor_index << " is original subgraph output. node: " << node->primitive_ << std::endl; 1154+ break; 1155+ } 1156+ } 1157+ bool is_graph_output = (std::find(lite_graph_->output_indices_.begin(),lite_graph_->output_indices_.end(), 1158+ iter.first) != lite_graph_->output_indices_.end()); 1159+ if (is_graph_output) { 1160+ MS_LOG(DEBUG) << "- tensor: " << new_tensor_index << " is graph output." << std::endl; 1161+ } 1162+ if (is_subgraph_output || is_graph_output) { 1163+ sub_graph->output_indices_.push_back(new_tensor_index); 1164+ MS_LOG(DEBUG) << "- select tensor: " << new_tensor_index << " as subgraph output." << std::endl; 1165+ } 1166+ } 1167+ 1168+ // deal with full-graph's input/output indices 1169+ sub_lite_graph->input_indices_ = sub_graph->input_indices_; 1170+ sub_lite_graph->output_indices_ = sub_graph->output_indices_; 1171+ sub_lite_graphs->push_back(sub_lite_graph); 1172 } 1173+ MS_LOG(INFO) << "Finished creating LiteGraph for NNRT subgraph"; 1174+ return kSuccess; 1175+} 1176 1177- ret_code = OH_NNCompilation_Build(oh_nn_compilation); 1178+struct TensorLocation { 1179+ uint32_t node_index; // the index of node which the tensor belongs to. 1180+ uint32_t tensor_index; // the index of node in/out tensors which the tensor is located at. 1181+}; 1182 1183+Status NNRTDelegate::InitNNCompilation(OH_NNCompilation *nn_compilation) const { 1184+ auto ret_code = OH_NNCompilation_SetDevice(nn_compilation, nnrt_device_info_.device_id_); 1185 if (ret_code != OH_NN_SUCCESS) { 1186- MS_LOG(ERROR) << "Build NNCompilation failed, OH_NN_ReturnCode = " << ret_code; 1187- OH_NNCompilation_Destroy(&oh_nn_compilation); 1188- OH_NNModel_Destroy(&oh_nnmodel); 1189- return mindspore::kLiteError; 1190+ MS_LOG(ERROR) << "NNCompilation set device id failed, ret: " << ret_code; 1191+ return kLiteError; 1192+ } 1193+ ret_code = OH_NNCompilation_SetPerformanceMode(nn_compilation, 1194+ (OH_NN_PerformanceMode)(nnrt_device_info_.performance_mode_)); 1195+ if ((ret_code != OH_NN_SUCCESS) && (ret_code != OH_NN_OPERATION_FORBIDDEN)) { 1196+ MS_LOG(ERROR) << "NNCompilation set performance mode failed, ret: " << ret_code; 1197+ return kLiteError; 1198+ } 1199+ ret_code = OH_NNCompilation_SetPriority(nn_compilation, (OH_NN_Priority)(nnrt_device_info_.priority_)); 1200+ if ((ret_code != OH_NN_SUCCESS) && (ret_code != OH_NN_OPERATION_FORBIDDEN)) { 1201+ MS_LOG(ERROR) << "NNCompilation set priority failed, ret: " << ret_code; 1202+ return kLiteError; 1203+ } 1204+ ret_code = OH_NNCompilation_EnableFloat16(nn_compilation, nnrt_device_info_.enable_fp16_); 1205+ if ((ret_code != OH_NN_SUCCESS) && (ret_code != OH_NN_OPERATION_FORBIDDEN)) { 1206+ MS_LOG(ERROR) << "NNCompilation enable fp16 failed, ret: " << ret_code; 1207+ return kLiteError; 1208 } 1209 1210- MS_LOG(DEBUG) << "NNRTDelegate SetDevice success."; 1211- 1212- OH_NNExecutor *oh_nn_executor = nullptr; 1213- oh_nn_executor = OH_NNExecutor_Construct(oh_nn_compilation); 1214- if (oh_nn_executor == nullptr) { 1215- MS_LOG(ERROR) << "Construct NNCompilation SetDevice failed, OH_NN_ReturnCode = " << ret_code; 1216- OH_NNCompilation_Destroy(&oh_nn_compilation); 1217- OH_NNModel_Destroy(&oh_nnmodel); 1218- return mindspore::kLiteError; 1219+ ret_code = OH_NNCompilation_Build(nn_compilation); 1220+ if (ret_code != OH_NN_SUCCESS) { 1221+ MS_LOG(ERROR) << "Build NNCompilation failed, ret: " << ret_code; 1222+ return kLiteError; 1223 } 1224- MS_LOG(DEBUG) << "NNRTDelegate creates NNExecutor success."; 1225- mindspore::Status prepare_data_ret; 1226- auto nnr_model_kernel = new (std::nothrow) NNRTModelKernel(oh_nn_executor, model->inputs(), model->outputs()); 1227- if (nnr_model_kernel == nullptr) { 1228- MS_LOG(ERROR) << "new NNRTModelKernel failed"; 1229- return mindspore::kLiteError; 1230+ return kSuccess; 1231+} 1232+ 1233+Status NNRTDelegate::CreateNNRTSubgraphKernels(DelegateModel<schema::Primitive> *model, 1234+ const std::vector<LiteGraph *> &sub_lite_graphs, const std::vector<NNRTOpRange> &nnrt_subgraph_ranges, 1235+ std::vector<NNRTModelKernel *> *nnrt_subgraph_kernels) { 1236+ for (size_t i = 0; i < sub_lite_graphs.size(); i++) { 1237+ auto sub_lite_graph = sub_lite_graphs[i]; 1238+ 1239+ OH_NNModel *nn_model = OH_NNModel_Construct(); 1240+ auto ret = OH_NNModel_BuildFromLiteGraph(nn_model, sub_lite_graph); 1241+ if (ret != OH_NN_SUCCESS) { 1242+ MS_LOG(ERROR) << "Build NNModel failed, ret: " << ret; 1243+ OH_NNModel_Destroy(&nn_model); 1244+ return kLiteError; 1245+ } 1246+ 1247+ OH_NNCompilation *nn_compilation = OH_NNCompilation_Construct(nn_model); 1248+ if (nn_compilation == nullptr) { 1249+ MS_LOG(ERROR) << "Construct NNCompilation failed"; 1250+ OH_NNModel_Destroy(&nn_model); 1251+ return kLiteError; 1252+ } 1253+ MS_LOG(DEBUG) << "NNRTDelegate creates NNCompilation success."; 1254+ 1255+ auto ret_code = InitNNCompilation(nn_compilation); 1256+ if (ret_code != kSuccess) { 1257+ MS_LOG(ERROR) << "Init NNCompilation failed"; 1258+ OH_NNCompilation_Destroy(&nn_compilation); 1259+ OH_NNModel_Destroy(&nn_model); 1260+ return kLiteError; 1261+ } 1262+ 1263+ OH_NNExecutor *nn_executor = nullptr; 1264+ nn_executor = OH_NNExecutor_Construct(nn_compilation); 1265+ if (nn_executor == nullptr) { 1266+ MS_LOG(ERROR) << "Construct NNExecutor failed, ret: " << ret_code; 1267+ OH_NNCompilation_Destroy(&nn_compilation); 1268+ OH_NNModel_Destroy(&nn_model); 1269+ return kLiteError; 1270+ } 1271+ MS_LOG(DEBUG) << "NNRTDelegate creates NNExecutor success."; 1272+ 1273+ bool format_not_support = false; 1274+ std::vector<MSTensor> in_tensors; 1275+ for (auto index: sub_lite_graph->sub_graphs_[0]->input_indices_) { 1276+ TensorLocation location; 1277+ for (auto node_index: sub_lite_graph->sub_graphs_[0]->node_indices_) { 1278+ auto node = sub_lite_graph->all_nodes_[node_index]; 1279+ auto iter = std::find(node->input_indices_.begin(), node->input_indices_.end(), index); 1280+ if (iter != node->input_indices_.end()) { 1281+ uint32_t tensor_index = iter - node->input_indices_.begin(); 1282+ location.node_index = node_index; 1283+ location.tensor_index = tensor_index; 1284+ MS_LOG(INFO) << "Found graph input index: " << index << " is the " << tensor_index << "th input of the node " << node->primitive_; 1285+ break; 1286+ } 1287+ } 1288+ KernelIter kernel_iter = nnrt_subgraph_ranges[i].begin_iter_ + location.node_index; 1289+ in_tensors.push_back((*kernel_iter)->inputs()[location.tensor_index]); 1290+ if (in_tensors.back().format() != Format::NHWC) { 1291+ format_not_support = true; 1292+ break ; 1293+ } 1294+ } 1295+ 1296+ std::vector<MSTensor> out_tensors; 1297+ for (auto index: sub_lite_graph->sub_graphs_[0]->output_indices_) { 1298+ TensorLocation location; 1299+ for (auto node_index: sub_lite_graph->sub_graphs_[0]->node_indices_) { 1300+ auto node = sub_lite_graph->all_nodes_[node_index]; 1301+ auto iter = std::find(node->output_indices_.begin(), node->output_indices_.end(), index); 1302+ if (iter != node->output_indices_.end()) { 1303+ uint32_t tensor_index = iter - node->output_indices_.begin(); 1304+ location.node_index = node_index; 1305+ location.tensor_index = tensor_index; 1306+ MS_LOG(INFO) << "Found graph output index: " << index << " is the " << tensor_index << "th output of the node " << node->primitive_; 1307+ break; 1308+ } 1309+ } 1310+ KernelIter kernel_iter = nnrt_subgraph_ranges[i].begin_iter_ + location.node_index; 1311+ out_tensors.push_back((*kernel_iter)->outputs()[location.tensor_index]); 1312+ if (out_tensors.back().format() != Format::NHWC) { 1313+ format_not_support = true; 1314+ break ; 1315+ } 1316+ } 1317+ if (format_not_support) { 1318+ MS_LOG(WARNING) << "Not support in/out tensor format, skip this subgraph"; 1319+ OH_NNCompilation_Destroy(&nn_compilation); 1320+ OH_NNModel_Destroy(&nn_model); 1321+ nnrt_subgraph_kernels->push_back(nullptr); 1322+ continue ; 1323+ } 1324+ 1325+ auto nnrt_model_kernel = new (std::nothrow)NNRTModelKernel(nn_executor, in_tensors, out_tensors); 1326+ if (nnrt_model_kernel == nullptr) { 1327+ MS_LOG(ERROR) << "new NNRTModelKernel failed"; 1328+ return kLiteError; 1329+ } 1330+ OH_NNCompilation_Destroy(&nn_compilation); 1331+ OH_NNModel_Destroy(&nn_model); 1332+ nnrt_subgraph_kernels->push_back(nnrt_model_kernel); 1333 } 1334- OH_NNCompilation_Destroy(&oh_nn_compilation); 1335- OH_NNModel_Destroy(&oh_nnmodel); 1336- KernelIter from = model->BeginKernelIterator(); 1337- KernelIter end = model->EndKernelIterator(); 1338- model->Replace(from, end, nnr_model_kernel); 1339- 1340- MS_LOG(DEBUG) << "NNRTDelegate build success."; 1341- return mindspore::kSuccess; 1342+ return kSuccess; 1343 } 1344 1345-mindspore::Status mindspore::NNRTDelegate::Init() { 1346- MS_LOG(DEBUG) << "NNRTDelegate init success."; 1347- return mindspore::kSuccess; 1348+void NNRTDelegate::ReplaceNNRTKernelsInDelegateModel(DelegateModel<schema::Primitive> *model, 1349+ const std::vector<NNRTOpRange> &nnrt_subgraph_ranges, 1350+ const std::vector<NNRTModelKernel *> &nnrt_subgraph_kernels) { 1351+ // Here we perform the replacement from back to front intentionally! If replace from front to end, the kernel 1352+ // sequence would shrink and the later begin_iter_/end_iter_ may be erased already. 1353+ for (int i = nnrt_subgraph_ranges.size() - 1; i >= 0; i--) { 1354+ if (nnrt_subgraph_kernels[i] == nullptr) { 1355+ continue; 1356+ } 1357+ auto from = nnrt_subgraph_ranges[i].begin_iter_; 1358+ auto end = nnrt_subgraph_ranges[i].end_iter_; 1359+ (void)model->Replace(from, end, nnrt_subgraph_kernels[i]); 1360+ MS_LOG(INFO) << "Replace nnrt subgraph kernel in range: [" << (from - model->BeginKernelIterator()) 1361+ << ", " << (end - model->BeginKernelIterator()) << ")"; 1362+ } 1363 } 1364-mindspore::Status mindspore::NNRTDelegate::PrepareInputs(DelegateModel<schema::Primitive> *model, 1365- OH_NNExecutor *oh_nn_executor) { 1366+ 1367+Status NNRTDelegate::PrepareInputs(DelegateModel<schema::Primitive> *model, 1368+ OH_NNExecutor *oh_nn_executor) { 1369 auto input_tensors = model->inputs(); 1370 for (size_t i = 0; i < input_tensors.size(); i++) { 1371 auto tensor = input_tensors[i]; 1372@@ -164,10 +460,10 @@ mindspore::Status mindspore::NNRTDelegate::PrepareInputs(DelegateModel<schema::P 1373 std::vector<double> scale; 1374 std::vector<int32_t> zero_point; 1375 if (!tmp_quant_param.empty()) { 1376- quant_param = new (std::nothrow) OH_NN_QuantParam; 1377+ quant_param = new(std::nothrow) OH_NN_QuantParam; 1378 if (quant_param == nullptr) { 1379 MS_LOG(ERROR) << "new OH_NN_QuantParam failed."; 1380- return mindspore::kLiteError; 1381+ return kLiteError; 1382 } 1383 for (auto qparam : tmp_quant_param) { 1384 bit_num.emplace_back(qparam.bit_num); 1385@@ -179,12 +475,12 @@ mindspore::Status mindspore::NNRTDelegate::PrepareInputs(DelegateModel<schema::P 1386 quant_param->scale = scale.data(); 1387 quant_param->zeroPoint = zero_point.data(); 1388 } 1389- auto oprend = new (std::nothrow) OH_NN_Tensor; 1390+ auto oprend = new(std::nothrow) OH_NN_Tensor; 1391 if (oprend == nullptr) { 1392 MS_LOG(ERROR) << "new OH_NN_Tensor Failed"; 1393- return mindspore::kLiteError; 1394+ return kLiteError; 1395 } 1396- oprend->dataType = ConvertDataType(tensor.DataType()); 1397+ oprend->dataType = CastToNNRTDataType(tensor.DataType()); 1398 oprend->dimensionCount = tensor_shape.size(); 1399 1400 std::vector<int32_t> dimensions_list; 1401@@ -194,14 +490,14 @@ mindspore::Status mindspore::NNRTDelegate::PrepareInputs(DelegateModel<schema::P 1402 } else { 1403 MS_LOG(ERROR) << "NNExecutor SetInput failed,tensor dimension is is too large, max dim = " << INT32_MAX 1404 << ", but get dimension = " << shape; 1405- return mindspore::kLiteError; 1406+ return kLiteError; 1407 } 1408 } 1409 oprend->dimensions = dimensions_list.data(); 1410 oprend->quantParam = quant_param; 1411 oprend->type = OH_NN_TENSOR; 1412 OH_NN_ReturnCode ret_code = 1413- OH_NNExecutor_SetInput(oh_nn_executor, i, oprend, tensor.MutableData(), tensor.DataSize()); 1414+ OH_NNExecutor_SetInput(oh_nn_executor, i, oprend, tensor.MutableData(), tensor.DataSize()); 1415 delete (oprend); 1416 1417 if (!tmp_quant_param.empty()) { 1418@@ -212,70 +508,37 @@ mindspore::Status mindspore::NNRTDelegate::PrepareInputs(DelegateModel<schema::P 1419 if (ret_code != OH_NN_SUCCESS) { 1420 MS_LOG(ERROR) << "NNExecutor SetInput failed, current input tensor is" << tensor.Name() 1421 << "OH_NN_ReturnCode = " << ret_code; 1422- return mindspore::kLiteError; 1423+ return kLiteError; 1424 } 1425 } 1426- 1427- return mindspore::kSuccess; 1428+ return kSuccess; 1429 } 1430-OH_NN_DataType mindspore::NNRTDelegate::ConvertDataType(mindspore::DataType data_type) { 1431- OH_NN_DataType oh_data_type; 1432- switch (data_type) { 1433- case mindspore::DataType::kTypeUnknown: 1434- case mindspore::DataType::kObjectTypeString: 1435- case mindspore::DataType::kObjectTypeList: 1436- case mindspore::DataType::kObjectTypeTuple: 1437- case mindspore::DataType::kObjectTypeTensorType: 1438- case mindspore::DataType::kNumberTypeBegin: 1439- case mindspore::DataType::kNumberTypeEnd: 1440- case mindspore::DataType::kInvalidType: 1441- oh_data_type = OH_NN_UNKNOWN; 1442- break; 1443- case mindspore::DataType::kNumberTypeBool: 1444- oh_data_type = OH_NN_BOOL; 1445- break; 1446- case mindspore::DataType::kNumberTypeInt8: 1447- oh_data_type = OH_NN_INT8; 1448- break; 1449- case mindspore::DataType::kNumberTypeInt16: 1450- oh_data_type = OH_NN_INT16; 1451- break; 1452- case mindspore::DataType::kNumberTypeInt32: 1453- oh_data_type = OH_NN_INT32; 1454- break; 1455- case mindspore::DataType::kNumberTypeInt64: 1456- oh_data_type = OH_NN_INT64; 1457- break; 1458- case mindspore::DataType::kNumberTypeUInt8: 1459- oh_data_type = OH_NN_UINT8; 1460- break; 1461- case mindspore::DataType::kNumberTypeUInt16: 1462- oh_data_type = OH_NN_UINT16; 1463- break; 1464- case mindspore::DataType::kNumberTypeUInt32: 1465- oh_data_type = OH_NN_UINT32; 1466- break; 1467- case mindspore::DataType::kNumberTypeUInt64: 1468- oh_data_type = OH_NN_UINT64; 1469- break; 1470- case mindspore::DataType::kNumberTypeFloat16: 1471- oh_data_type = OH_NN_FLOAT16; 1472- break; 1473- case mindspore::DataType::kNumberTypeFloat32: 1474- oh_data_type = OH_NN_FLOAT32; 1475- break; 1476- case mindspore::DataType::kNumberTypeFloat64: 1477- oh_data_type = OH_NN_FLOAT64; 1478- break; 1479- default: { 1480- oh_data_type = OH_NN_UNKNOWN; 1481- } 1482+ 1483+OH_NN_DataType NNRTDelegate::CastToNNRTDataType(DataType data_type) { 1484+ const std::unordered_map<DataType, OH_NN_DataType> kDataTypeMap = { 1485+ {DataType::kNumberTypeBool, OH_NN_BOOL}, 1486+ {DataType::kNumberTypeInt8, OH_NN_INT8}, 1487+ {DataType::kNumberTypeInt16, OH_NN_INT16}, 1488+ {DataType::kNumberTypeInt32, OH_NN_INT32}, 1489+ {DataType::kNumberTypeInt64, OH_NN_INT64}, 1490+ {DataType::kNumberTypeUInt8, OH_NN_UINT8}, 1491+ {DataType::kNumberTypeUInt16, OH_NN_UINT16}, 1492+ {DataType::kNumberTypeUInt32, OH_NN_UINT32}, 1493+ {DataType::kNumberTypeUInt64, OH_NN_UINT64}, 1494+ {DataType::kNumberTypeFloat16, OH_NN_FLOAT16}, 1495+ {DataType::kNumberTypeFloat32, OH_NN_FLOAT32}, 1496+ {DataType::kNumberTypeFloat64, OH_NN_FLOAT64}, 1497+ }; 1498+ 1499+ auto iter = kDataTypeMap.find(data_type); 1500+ if (iter == kDataTypeMap.end()) { 1501+ return OH_NN_UNKNOWN; 1502 } 1503- return oh_data_type; 1504+ return iter->second; 1505 } 1506 1507-mindspore::Status mindspore::NNRTDelegate::PrepareOutputs(DelegateModel<schema::Primitive> *model, 1508- OH_NNExecutor *oh_nn_executor) { 1509+Status NNRTDelegate::PrepareOutputs(DelegateModel<schema::Primitive> *model, 1510+ OH_NNExecutor *oh_nn_executor) { 1511 auto output_tensors = model->outputs(); 1512 for (size_t i = 0; i < output_tensors.size(); i++) { 1513 auto tensor = output_tensors[i]; 1514@@ -283,17 +546,17 @@ mindspore::Status mindspore::NNRTDelegate::PrepareOutputs(DelegateModel<schema:: 1515 if (ret_code != OH_NN_SUCCESS) { 1516 MS_LOG(ERROR) << "NNExecutor SetOutput failed, current out tensor is" << tensor.Name() 1517 << ", OH_NN_ReturnCode = " << ret_code; 1518- return mindspore::kLiteError; 1519+ return kLiteError; 1520 } 1521 } 1522- return mindspore::kSuccess; 1523+ return kSuccess; 1524 } 1525 1526-void mindspore::NNRTDelegate::ShallowCopyLiteGraph(const mindspore::lite::LiteGraph &lite_graph) { 1527+void NNRTDelegate::ShallowCopyLiteGraph(const lite::LiteGraph &lite_graph) { 1528 Status ret; 1529 for (auto node : lite_graph.all_nodes_) { 1530 ret = lite::CheckPrimitiveSupported(static_cast<const schema::Primitive *>(node->primitive_)); 1531- if (ret == mindspore::kLiteError) { 1532+ if (ret == kLiteError) { 1533 MS_LOG(ERROR) << " primitive supported check failed."; 1534 return; 1535 } 1536@@ -302,7 +565,7 @@ void mindspore::NNRTDelegate::ShallowCopyLiteGraph(const mindspore::lite::LiteGr 1537 node_list.reserve(lite_graph.all_nodes_.size()); 1538 // copy node 1539 for (auto node : lite_graph.all_nodes_) { 1540- auto new_node = new (std::nothrow) LiteGraph::Node; 1541+ auto new_node = new(std::nothrow) LiteGraph::Node; 1542 if (new_node == nullptr) { 1543 MS_LOG(ERROR) << " new LiteGraph::Node failed."; 1544 return; 1545@@ -321,7 +584,7 @@ void mindspore::NNRTDelegate::ShallowCopyLiteGraph(const mindspore::lite::LiteGr 1546 // copy subgraph 1547 std::vector<LiteGraph::SubGraph *> subgraph_list; 1548 for (auto subgraph : lite_graph.sub_graphs_) { 1549- auto new_subgraph = new (std::nothrow) LiteGraph::SubGraph; 1550+ auto new_subgraph = new(std::nothrow) LiteGraph::SubGraph; 1551 if (new_subgraph == nullptr) { 1552 MS_LOG(ERROR) << "new LiteGraph::Subgraph failed."; 1553 return; 1554@@ -334,30 +597,32 @@ void mindspore::NNRTDelegate::ShallowCopyLiteGraph(const mindspore::lite::LiteGr 1555 } 1556 for (auto tensor : lite_graph.all_tensors_) { 1557 ret = lite::CheckTensorSupported(static_cast<const schema::Tensor *>(tensor)); 1558- if (ret == mindspore::kLiteError) { 1559+ if (ret == kLiteError) { 1560 MS_LOG(ERROR) << "tensor supported check failed."; 1561 return; 1562 } 1563 } 1564 1565- nnrt_lite_graph = new (std::nothrow) lite::LiteGraph(); 1566- if (nnrt_lite_graph == nullptr) { 1567+ lite_graph_ = new(std::nothrow) lite::LiteGraph(); 1568+ if (lite_graph_ == nullptr) { 1569 MS_LOG(ERROR) << "new LiteGraph failed."; 1570 return; 1571 } 1572 1573- nnrt_lite_graph->name_ = lite_graph.name_; 1574- nnrt_lite_graph->version_ = lite_graph.version_; 1575- nnrt_lite_graph->input_indices_ = lite_graph.input_indices_; 1576- nnrt_lite_graph->output_indices_ = lite_graph.output_indices_; 1577- nnrt_lite_graph->all_tensors_ = lite_graph.all_tensors_; 1578- nnrt_lite_graph->all_nodes_ = node_list; 1579- nnrt_lite_graph->sub_graphs_ = subgraph_list; 1580+ lite_graph_->name_ = lite_graph.name_; 1581+ lite_graph_->version_ = lite_graph.version_; 1582+ lite_graph_->input_indices_ = lite_graph.input_indices_; 1583+ lite_graph_->output_indices_ = lite_graph.output_indices_; 1584+ lite_graph_->all_tensors_ = lite_graph.all_tensors_; 1585+ lite_graph_->all_nodes_ = node_list; 1586+ lite_graph_->sub_graphs_ = subgraph_list; 1587 MS_LOG(INFO) << "ShallowCopyLiteGraph success."; 1588 } 1589 1590-mindspore::NNRTDelegate::~NNRTDelegate() { 1591- if (this->nnrt_lite_graph != nullptr) { 1592+NNRTDelegate::~NNRTDelegate() { 1593+ if (lite_graph_ != nullptr) { 1594 MS_LOG(ERROR) << "Delete NNRTDelegate."; 1595 } 1596-}; 1597+} 1598+} // namespace lite 1599+} // namespace mindspore 1600diff --git a/mindspore/lite/src/runtime/delegate/nnrt/nnrt_delegate.h b/mindspore/lite/src/runtime/delegate/nnrt/nnrt_delegate.h 1601index 1be08119..48adc388 100644 1602--- a/mindspore/lite/src/runtime/delegate/nnrt/nnrt_delegate.h 1603+++ b/mindspore/lite/src/runtime/delegate/nnrt/nnrt_delegate.h 1604@@ -15,38 +15,64 @@ 1605 */ 1606 #ifndef MINDSPORE_NNR_DELEGATE_H 1607 #define MINDSPORE_NNR_DELEGATE_H 1608+ 1609 #include <vector> 1610 #include <map> 1611 #include "include/api/delegate.h" 1612 #include "include/context.h" 1613 #include "include/model.h" 1614+#include "nnrt_model_kernel.h" 1615+#include "schema/model_generated.h" 1616 #include "interfaces/kits/c/neural_network_runtime_type.h" 1617-namespace mindspore { 1618+#include "interfaces/kits/c/neural_network_runtime.h" 1619+#include "interfaces/innerkits/c/neural_network_runtime_inner.h" 1620 1621-using namespace lite; 1622+namespace mindspore { 1623+namespace lite { 1624+struct NNRTOpRange { 1625+ /* NNRT kernel range in DelegateModel: [begin_iter_, end_iter_) */ 1626+ KernelIter begin_iter_; 1627+ KernelIter end_iter_; 1628+ /* NNRT node range in lite_graph_: [begin_index_, end_index_) */ 1629+ size_t begin_index_; 1630+ size_t end_index_; 1631+}; 1632 1633 class NNRTDelegate : public Delegate { 1634 public: 1635- NNRTDelegate() : Delegate(){}; 1636- 1637+ NNRTDelegate() = default; 1638+ NNRTDelegate(const NNRtDeviceInfo &nnrt_device_info) : nnrt_device_info_(nnrt_device_info) {} 1639 ~NNRTDelegate() override; 1640- 1641- Status Init() override; 1642- 1643+ Status Init() override { return kSuccess; } 1644 Status Build(DelegateModel<schema::Primitive> *model) override; 1645- 1646 void ShallowCopyLiteGraph(const lite::LiteGraph &liteGraph); 1647- 1648- protected: 1649- LiteGraph *nnrt_lite_graph = nullptr; 1650+ static std::vector<NNRTOpRange> GetNNRTSubgraphRanges(DelegateModel<schema::Primitive> *model, 1651+ const std::vector<bool> &op_supports); 1652 1653 private: 1654- // static LiteGraph* CreateLiteGraph(const LiteGraph &liteGraph); 1655+ OH_NNModel *BuildFullNNModel(); 1656+ std::vector<bool> QueryOpSupports(OH_NNModel *nn_model); 1657+ 1658+ Status CreateLiteGraphForNNRTSubgraph( 1659+ const std::vector<NNRTOpRange> &nnrt_op_ranges, 1660+ std::vector<LiteGraph *> *sub_lite_graphs); 1661+ Status CreateNNRTSubgraphKernels( 1662+ DelegateModel<schema::Primitive> *model, 1663+ const std::vector<LiteGraph *> &sub_lite_graphs, 1664+ const std::vector<NNRTOpRange> &nnrt_subgraph_ranges, 1665+ std::vector<NNRTModelKernel *> *nnrt_subgraph_kernels); 1666+ void ReplaceNNRTKernelsInDelegateModel(DelegateModel<schema::Primitive> *model, 1667+ const std::vector<NNRTOpRange> &nnrt_subgraph_ranges, 1668+ const std::vector<NNRTModelKernel *> &nnrt_subgraph_kernels); 1669 Status PrepareInputs(DelegateModel<schema::Primitive> *model, OH_NNExecutor *oh_nn_executor); 1670 Status PrepareOutputs(DelegateModel<schema::Primitive> *model, OH_NNExecutor *oh_nn_executor); 1671- OH_NN_DataType ConvertDataType(mindspore::DataType data_type); 1672-}; 1673+ Status InitNNCompilation(OH_NNCompilation *nn_compilation) const; 1674+ static OH_NN_DataType CastToNNRTDataType(mindspore::DataType data_type); 1675 1676+ NNRtDeviceInfo nnrt_device_info_; 1677+ LiteGraph *lite_graph_ = nullptr; 1678+}; 1679+} // namespace lite 1680 } // namespace mindspore 1681 1682 #endif // MINDSPORE_NNR_DELEGATE_H 1683diff --git a/mindspore/lite/src/runtime/delegate/nnrt/nnrt_stub.cc b/mindspore/lite/src/runtime/delegate/nnrt/nnrt_stub.cc 1684index 886ac304..f2bee949 100644 1685--- a/mindspore/lite/src/runtime/delegate/nnrt/nnrt_stub.cc 1686+++ b/mindspore/lite/src/runtime/delegate/nnrt/nnrt_stub.cc 1687@@ -75,4 +75,25 @@ OH_NN_ReturnCode OH_NNModel_GetAvailableOperations(OH_NNModel *model, 1688 1689 OH_NN_ReturnCode OH_NNModel_BuildFromLiteGraph(OH_NNModel *model, const void *liteGraph) { 1690 return OH_NN_SUCCESS; 1691+} 1692+ 1693+OH_NN_ReturnCode OH_NNDevice_GetName(size_t deviceID, const char **name) { 1694+ return OH_NN_SUCCESS; 1695+} 1696+ 1697+OH_NN_ReturnCode OH_NNDevice_GetType(size_t deviceID, OH_NN_DeviceType *deviceType) { 1698+ return OH_NN_SUCCESS; 1699+} 1700+ 1701+OH_NN_ReturnCode OH_NNCompilation_SetPriority(OH_NNCompilation *compilation, OH_NN_Priority priority) { 1702+ return OH_NN_SUCCESS; 1703+} 1704+ 1705+OH_NN_ReturnCode OH_NNCompilation_EnableFloat16(OH_NNCompilation *compilation, bool enableFloat16) { 1706+ return OH_NN_SUCCESS; 1707+} 1708+ 1709+OH_NN_ReturnCode OH_NNCompilation_SetPerformanceMode(OH_NNCompilation *compilation, 1710+ OH_NN_PerformanceMode performanceMode) { 1711+ return OH_NN_SUCCESS; 1712 } 1713\ No newline at end of file 1714diff --git a/mindspore/lite/src/runtime/lite_model.cc b/mindspore/lite/src/runtime/lite_model.cc 1715index 124b4728..cd8e68d1 100644 1716--- a/mindspore/lite/src/runtime/lite_model.cc 1717+++ b/mindspore/lite/src/runtime/lite_model.cc 1718@@ -91,6 +91,8 @@ int LiteModel::ConvertSubGraph(const schema::SubGraph &sub_graph) { 1719 if (sub_graph.name() == nullptr || sub_graph.inputIndices() == nullptr || sub_graph.outputIndices() == nullptr || 1720 sub_graph.tensorIndices() == nullptr) { 1721 MS_LOG(ERROR) << "sub_graph is invalid"; 1722+ MS_LOG(ERROR) << "sub_graph.name() = " << sub_graph.name() << ", sub_graph.inputIndices() = " << sub_graph.inputIndices() 1723+ << ", sub_graph.outputIndices() = " << sub_graph.outputIndices() << ", sub_graph.tensorIndices() = " << sub_graph.tensorIndices(); 1724 return RET_ERROR; 1725 } 1726 1727@@ -530,6 +532,33 @@ Model *ImportFromBuffer(const char *model_buf, size_t size, bool take_buf, minds 1728 return model; 1729 } 1730 1731+std::string LiteGraph::ToString() const { 1732+ std::stringstream ss; 1733+ ss << "all_nodes: " << all_nodes_.size() << std::endl; 1734+ for (size_t i = 0; i < all_nodes_.size(); i++) { 1735+ ss << "- node " << i << ": " << all_nodes_[i]->primitive_ << std::endl; 1736+ ss << "- node " << i << " input_indices_: " << all_nodes_[i]->input_indices_ << std::endl; 1737+ ss << "- node " << i << " output_indices_: " << all_nodes_[i]->output_indices_ << std::endl; 1738+ } 1739+ ss << "all_tensors: " << all_tensors_.size() << std::endl; 1740+ for (size_t i = 0; i < all_tensors_.size(); i++) { 1741+ ss << "- tensor " << i << ": " << all_tensors_[i] << std::endl; 1742+ } 1743+ ss << "input_indices: " << input_indices_<< std::endl; 1744+ ss << "output_indices: " << output_indices_ << std::endl; 1745+ 1746+ ss << "subgraphs: " << std::endl; 1747+ int count = 0; 1748+ for (auto subgraph: sub_graphs_) { 1749+ ss << "- subgraph " << count++ << std::endl; 1750+ ss << "--- subgraph input " << subgraph->input_indices_ << std::endl; 1751+ ss << "--- subgraph output " << subgraph->output_indices_ << std::endl; 1752+ ss << "--- subgraph node " << subgraph->node_indices_ << std::endl; 1753+ ss << "--- subgraph tensor " << subgraph->tensor_indices_ << std::endl; 1754+ } 1755+ return ss.str(); 1756+} 1757+ 1758 Model *Model::Import(const char *model_buf, size_t size) { return ImportFromBuffer(model_buf, size, false); } 1759 1760 Model *Model::Import(const char *filename) { return ImportFromPath(filename); } 1761diff --git a/mindspore/lite/src/runtime/lite_session.cc b/mindspore/lite/src/runtime/lite_session.cc 1762index eb83f444..b8808e21 100644 1763--- a/mindspore/lite/src/runtime/lite_session.cc 1764+++ b/mindspore/lite/src/runtime/lite_session.cc 1765@@ -834,7 +834,14 @@ int LiteSession::CreateNPUDelegate() { 1766 1767 int LiteSession::CreateNNRTDelegate() { 1768 #if SUPPORT_NNRT 1769- delegate_ = std::make_shared<NNRTDelegate>(); 1770+ auto iter = std::find_if(context_->device_list_.begin(), context_->device_list_.end(), 1771+ [](DeviceContext &device) { return device.device_type_ == lite::DT_NNRT; }); 1772+ if(iter == context_->device_list_.end()) { 1773+ MS_LOG(ERROR) << "Found non NNRT device info"; 1774+ return RET_ERROR; 1775+ } 1776+ 1777+ delegate_ = std::make_shared<NNRTDelegate>(iter->device_info_.nnrt_device_info_); 1778 if (delegate_ == nullptr) { 1779 MS_LOG(ERROR) << "New NNRT delegate failed"; 1780 return RET_ERROR; 1781diff --git a/mindspore/lite/src/runtime/tensor_category.cc b/mindspore/lite/src/runtime/tensor_category.cc 1782index 07d14de0..9a755d81 100644 1783--- a/mindspore/lite/src/runtime/tensor_category.cc 1784+++ b/mindspore/lite/src/runtime/tensor_category.cc 1785@@ -30,5 +30,9 @@ Category TensorCategory(const schema::Tensor &tensor) { 1786 auto data_size = tensor.data() == nullptr ? 0 : tensor.data()->size(); 1787 return TensorCategory(tensor.nodeType(), shape_num, TypeId(tensor.dataType()), data_size); 1788 } 1789+ 1790+bool IsConstTensor(const schema::Tensor &tensor) { 1791+ return TensorCategory(tensor) != Category::VAR; 1792+} 1793 } // namespace lite 1794 } // namespace mindspore 1795diff --git a/mindspore/lite/src/runtime/tensor_category.h b/mindspore/lite/src/runtime/tensor_category.h 1796index 34d6cc82..8dadc344 100644 1797--- a/mindspore/lite/src/runtime/tensor_category.h 1798+++ b/mindspore/lite/src/runtime/tensor_category.h 1799@@ -34,6 +34,7 @@ enum Category { 1800 1801 Category TensorCategory(const int node_type, const size_t shape_num, const TypeId data_type, const size_t data_size); 1802 Category TensorCategory(const schema::Tensor &tensor); 1803+bool IsConstTensor(const schema::Tensor &tensor); 1804 } // namespace lite 1805 } // namespace mindspore 1806 #endif // MINDSPORE_LITE_SRC_RUNTIME_TENSOR_CATEGORY_H_ 1807diff --git a/mindspore/lite/test/CMakeLists.txt b/mindspore/lite/test/CMakeLists.txt 1808index 5fa7bea0..c7c4a3cb 100644 1809--- a/mindspore/lite/test/CMakeLists.txt 1810+++ b/mindspore/lite/test/CMakeLists.txt 1811@@ -28,9 +28,13 @@ file(GLOB_RECURSE TEST_UT_SRC 1812 ${TEST_DIR}/ut/src/runtime/kernel/arm/common/*.cc 1813 ${TEST_DIR}/ut/src/runtime/kernel/arm/fp32/*.cc 1814 ${TEST_DIR}/ut/src/runtime/kernel/arm/string/*.cc 1815- ${TEST_DIR}/ut/src/api/context_c_test.cc 1816- ${TEST_DIR}/ut/src/api/tensor_c_test.cc 1817+# ${TEST_DIR}/ut/src/api/context_c_test.cc 1818+# ${TEST_DIR}/ut/src/api/tensor_c_test.cc 1819 ) 1820+if(MSLITE_ENABLE_NNRT) 1821+ list(APPEND TEST_UT_SRC ${TEST_DIR}/ut/src/runtime/nnrt_delegate/nnrt_delegate_tests.cc) 1822+endif() 1823+ 1824 if(MSLITE_ENABLE_SERVER_INFERENCE) 1825 list(APPEND TEST_UT_SRC ${TEST_DIR}/ut/src/api/model_parallel_runner_test.cc) 1826 endif() 1827@@ -85,7 +89,7 @@ endif() 1828 1829 if(MSLITE_ENABLE_INT8) 1830 file(GLOB_RECURSE TEST_INT8_UT_SRC 1831- ${TEST_DIR}/ut/src/runtime/kernel/arm/int8/*.cc 1832+# ${TEST_DIR}/ut/src/runtime/kernel/arm/int8/*.cc 1833 ${TEST_DIR}/ut/nnacl/int8/*.cc 1834 ) 1835 list(APPEND TEST_UT_SRC ${TEST_INT8_UT_SRC}) 1836diff --git a/mindspore/lite/test/ut/src/runtime/nnrt_delegate/nnrt_delegate_tests.cc b/mindspore/lite/test/ut/src/runtime/nnrt_delegate/nnrt_delegate_tests.cc 1837new file mode 100644 1838index 00000000..e1ea3968 1839--- /dev/null 1840+++ b/mindspore/lite/test/ut/src/runtime/nnrt_delegate/nnrt_delegate_tests.cc 1841@@ -0,0 +1,59 @@ 1842+/** 1843+ * Copyright 2023 Huawei Technologies Co., Ltd 1844+ * 1845+ * Licensed under the Apache License, Version 2.0 (the "License"); 1846+ * you may not use this file except in compliance with the License. 1847+ * You may obtain a copy of the License at 1848+ * 1849+ * http://www.apache.org/licenses/LICENSE-2.0 1850+ * 1851+ * Unless required by applicable law or agreed to in writing, software 1852+ * distributed under the License is distributed on an "AS IS" BASIS, 1853+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 1854+ * See the License for the specific language governing permissions and 1855+ * limitations under the License. 1856+ */ 1857+ 1858+#include <memory> 1859+#include "gtest/gtest.h" 1860+#include "runtime/delegate/nnrt/nnrt_delegate.h" 1861+ 1862+using namespace mindspore; 1863+using namespace mindspore::lite; 1864+ 1865+void AssertOpRange(const std::vector<NNRTOpRange> &op_ranges, std::vector<std::vector<size_t>> expect) { 1866+ ASSERT_EQ(op_ranges.size(), expect.size()); 1867+ for (size_t i = 0; i < op_ranges.size(); i++) { 1868+ ASSERT_EQ(op_ranges[i].begin_index_, expect[i][0]); 1869+ ASSERT_EQ(op_ranges[i].end_index_, expect[i][1]); 1870+ } 1871+} 1872+ 1873+TEST(NNRTDelegateTest, GetNNRTSubgraphRanges) { 1874+ // Prepare DelegateModel 1875+ std::vector<kernel::Kernel *> kernels(5, nullptr); 1876+ std::vector<MSTensor> inputs = {}; 1877+ std::vector<MSTensor> outputs = {}; 1878+ std::unique_ptr<DelegateModel<schema::Primitive>> model; 1879+ model.reset(new DelegateModel<schema::Primitive>(&kernels, inputs, outputs, {}, SCHEMA_CUR)); 1880+ 1881+ std::cout << "Test case 1, expect: {[0, 1), [3, 5)}" << std::endl; 1882+ auto op_ranges01 = NNRTDelegate::GetNNRTSubgraphRanges(model.get(), {true, false, false, true, true}); 1883+ AssertOpRange(op_ranges01, {{0, 1}, {3, 5}}); 1884+ 1885+ std::cout << "Test case 2, expect: {}" << std::endl; 1886+ auto op_ranges02 = NNRTDelegate::GetNNRTSubgraphRanges(model.get(), {false, false, false, false, false}); 1887+ AssertOpRange(op_ranges02, {}); 1888+ 1889+ std::cout << "Test case 3, expect: {[0, 5)}" << std::endl; 1890+ auto op_ranges03 = NNRTDelegate::GetNNRTSubgraphRanges(model.get(), {true, true, true, true, true}); 1891+ AssertOpRange(op_ranges03, {{0, 5}}); 1892+ 1893+ std::cout << "Test case 4, expect: {[0, 1), [2, 3), [4, 5)}" << std::endl; 1894+ auto op_ranges04 = NNRTDelegate::GetNNRTSubgraphRanges(model.get(), {true, false, true, false, true}); 1895+ AssertOpRange(op_ranges04, {{0, 1}, {2, 3}, {4, 5}}); 1896+ 1897+ std::cout << "Test case 5, expect: {[1, 2), [3, 4)}" << std::endl; 1898+ auto op_ranges05 = NNRTDelegate::GetNNRTSubgraphRanges(model.get(), {false, true, false, true, false}); 1899+ AssertOpRange(op_ranges05, {{1, 2}, {3, 4}}); 1900+} 1901\ No newline at end of file 1902-- 19032.34.1 1904 1905