From d8ae073cf0e9f10fcbd129a145927492f65edcca Mon Sep 17 00:00:00 2001
From: Zhu Guodong <zhuguodong0001@163.com>
Date: Tue, 30 May 2023 12:17:40 +0800
Subject: [PATCH] auto-apply
 0010-nnrt-delegate-supports-heterogeneous-predition.patch

---
 include/api/context.h                         |  40 ++
 include/c_api/context_c.h                     |  85 +++
 include/c_api/types_c.h                       |  36 +
 include/sdk_api/context.h                     |  85 +++
 include/sdk_api/types.h                       |  38 +-
 mindspore/lite/include/context.h              |   4 +
 mindspore/lite/include/model.h                |   4 +
 mindspore/lite/src/runtime/c_api/context_c.cc | 196 ++++++
 mindspore/lite/src/runtime/c_api/context_c.h  |   3 +
 .../lite/src/runtime/c_api/type_c_private.h   |  40 ++
 mindspore/lite/src/runtime/cxx_api/context.cc |  68 ++
 .../lite/src/runtime/cxx_api/converters.cc    |  15 +-
 .../lite/src/runtime/cxx_api/converters.h     |   2 +-
 .../runtime/delegate/nnrt/nnrt_delegate.cc    | 633 +++++++++++++-----
 .../src/runtime/delegate/nnrt/nnrt_delegate.h |  54 +-
 .../src/runtime/delegate/nnrt/nnrt_stub.cc    |  21 +
 mindspore/lite/src/runtime/lite_model.cc      |  29 +
 mindspore/lite/src/runtime/lite_session.cc    |   9 +-
 mindspore/lite/src/runtime/tensor_category.cc |   4 +
 mindspore/lite/src/runtime/tensor_category.h  |   1 +
 mindspore/lite/test/CMakeLists.txt            |  10 +-
 .../nnrt_delegate/nnrt_delegate_tests.cc      |  59 ++
 22 files changed, 1229 insertions(+), 207 deletions(-)
 create mode 100644 mindspore/lite/src/runtime/c_api/type_c_private.h
 create mode 100644 mindspore/lite/test/ut/src/runtime/nnrt_delegate/nnrt_delegate_tests.cc

diff --git a/include/api/context.h b/include/api/context.h
index d88b9d44..4c25aa10 100644
--- a/include/api/context.h
+++ b/include/api/context.h
@@ -519,6 +519,46 @@ class MS_API NNRTDeviceInfo : public DeviceInfoContext {
   ///
   /// \return Type of this DeviceInfoContext.
   enum DeviceType GetDeviceType() const override { return DeviceType::kNNRt; };
+
+  /// \brief Set device id.
+  ///
+  /// \param[in] device_id The device id.
+  void SetDeviceID(size_t device_id);
+
+  /// \brief Get the device id.
+  ///
+  /// \return The device id.
+  size_t GetDeviceID() const;
+
+  /// \brief Set performance mode.
+  ///
+  /// \param[in] performance_mode The performance mode.
+  void SetPerformanceMode(int performance_mode);
+
+  /// \brief Get performance mode.
+  ///
+  /// \return The priority.
+  int GetPerformanceMode() const;
+
+  /// \brief Set priority.
+  ///
+  /// \param[in] priority The priority.
+  void SetPriority(int priority);
+
+  /// \brief Get priority.
+  ///
+  /// \return The priority.
+  int GetPriority() const;
+
+  /// \brief Set enables to perform the float16 inference
+  ///
+  /// \param[in] is_fp16 Enable float16 inference or not.
+  void SetEnableFP16(bool is_fp16);
+
+  /// \brief Get enables to perform the float16 inference
+  ///
+  /// \return Whether enable float16 inference.
+  bool GetEnableFP16() const;
 };
 }  // namespace mindspore
 #endif  // MINDSPORE_INCLUDE_API_CONTEXT_H
diff --git a/include/c_api/context_c.h b/include/c_api/context_c.h
index 53839e80..09220f20 100644
--- a/include/c_api/context_c.h
+++ b/include/c_api/context_c.h
@@ -173,6 +173,91 @@ OH_AI_API void OH_AI_DeviceInfoSetFrequency(OH_AI_DeviceInfoHandle device_info,
 /// \return NPU frequency
 OH_AI_API int OH_AI_DeviceInfoGetFrequency(const OH_AI_DeviceInfoHandle device_info);
 
+/// \brief Obtain the all device descriptions in NNRT.
+///
+/// \param[out] num Number of NNRT device description.
+///
+/// \return NNRT device description array.
+OH_AI_API NNRTDeviceDesc *OH_AI_GetAllNNRTDeviceDescs(size_t *num);
+
+/// \brief Destroy the NNRT device descriptions returned by OH_AI_GetAllNNRTDeviceDescs().
+///
+/// \param[in] desc NNRT device description array.
+OH_AI_API void OH_AI_DestroyAllNNRTDeviceDescs(NNRTDeviceDesc **desc);
+
+/// \brief Obtain the device id in NNRT device description.
+///
+/// \param[in] desc pointer to the NNRT device description instance.
+///
+/// \return NNRT device id.
+OH_AI_API size_t OH_AI_GetDeviceIdFromNNRTDeviceDesc(const NNRTDeviceDesc *desc);
+
+/// \brief Obtain the device name in NNRT device description.
+///
+/// \param[in] desc pointer to the NNRT device description instance.
+///
+/// \return NNRT device name.
+OH_AI_API const char *OH_AI_GetNameFromNNRTDeviceDesc(const NNRTDeviceDesc *desc);
+
+/// \brief Obtain the device type in NNRT device description.
+///
+/// \param[in] desc pointer to the NNRT device description instance.
+///
+/// \return NNRT device type.
+OH_AI_API OH_AI_NNRTDeviceType OH_AI_GetTypeFromNNRTDeviceDesc(const NNRTDeviceDesc *desc);
+
+/// \brief Create the NNRT device info by exactly matching the specific device name.
+///
+/// \param[in] name NNRt device name.
+///
+/// \return Device info object handle.
+OH_AI_API OH_AI_DeviceInfoHandle OH_AI_CreateNNRTDeviceInfoByName(const char *name);
+
+/// \brief Create the NNRT device info by finding the first device with the specific device type.
+///
+/// \param[in] name NNRt device type.
+///
+/// \return Device info object handle.
+OH_AI_API OH_AI_DeviceInfoHandle OH_AI_CreateNNRTDeviceInfoByType(OH_AI_NNRTDeviceType type);
+
+/// \brief Set the NNRT device id, Only valid for NNRT.
+///
+/// \param[in] device_info Device info object handle.
+/// \param[in] device_id NNRT device id.
+OH_AI_API void OH_AI_DeviceInfoSetDeviceId(OH_AI_DeviceInfoHandle device_info, size_t device_id);
+
+/// \brief Obtain the NNRT device id, Only valid for NNRT.
+///
+/// \param[in] device_info Device info object handle.
+///
+/// \return NNRT device id.
+OH_AI_API size_t OH_AI_DeviceInfoGetDeviceId(const OH_AI_DeviceInfoHandle device_info);
+
+/// \brief Set the NNRT performance mode, Only valid for NNRT.
+///
+/// \param[in] device_info Device info object handle.
+/// \param[in] device_id NNRT performance mode.
+OH_AI_API void OH_AI_DeviceInfoSetPerformanceMode(OH_AI_DeviceInfoHandle device_info, OH_AI_PerformanceMode mode);
+
+/// \brief Obtain the NNRT performance mode, Only valid for NNRT.
+///
+/// \param[in] device_info Device info object handle.
+///
+/// \return NNRT performance mode.
+OH_AI_API OH_AI_PerformanceMode OH_AI_DeviceInfoGetPerformanceMode(const OH_AI_DeviceInfoHandle device_info);
+
+/// \brief Set the NNRT priority, Only valid for NNRT.
+///
+/// \param[in] device_info Device info object handle.
+/// \param[in] device_id NNRT priority.
+OH_AI_API void OH_AI_DeviceInfoSetPriority(OH_AI_DeviceInfoHandle device_info, OH_AI_Priority priority);
+
+/// \brief Obtain the NNRT priority, Only valid for NNRT.
+///
+/// \param[in] device_info Device info object handle.
+///
+/// \return NNRT priority.
+OH_AI_API OH_AI_Priority OH_AI_DeviceInfoGetPriority(const OH_AI_DeviceInfoHandle device_info);
 #ifdef __cplusplus
 }
 #endif
diff --git a/include/c_api/types_c.h b/include/c_api/types_c.h
index fdf91f5a..d612eb97 100644
--- a/include/c_api/types_c.h
+++ b/include/c_api/types_c.h
@@ -44,6 +44,42 @@ typedef enum OH_AI_DeviceType {
   OH_AI_DEVICETYPE_INVALID = 100,
 } OH_AI_DeviceType;
 
+typedef enum OH_AI_NNRTDeviceType {
+  /** Devices that are not CPU, GPU, or dedicated accelerator */
+  OH_AI_NNRTDEVICE_OTHERS = 0,
+  /** CPU device */
+  OH_AI_NNRTDEVICE_CPU = 1,
+  /** GPU device */
+  OH_AI_NNRTDEVICE_GPU = 2,
+  /** Dedicated hardware accelerator */
+  OH_AI_NNRTDEVICE_ACCELERATOR = 3,
+} OH_AI_NNRTDeviceType;
+
+typedef enum OH_AI_PerformanceMode {
+  /** No performance mode preference */
+  OH_AI_PERFORMANCE_NONE = 0,
+  /** Low power consumption mode*/
+  OH_AI_PERFORMANCE_LOW = 1,
+  /** Medium performance mode */
+  OH_AI_PERFORMANCE_MEDIUM = 2,
+  /** High performance mode */
+  OH_AI_PERFORMANCE_HIGH = 3,
+  /** Ultimate performance mode */
+  OH_AI_PERFORMANCE_EXTREME = 4
+} OH_AI_PerformanceMode;
+
+typedef enum OH_AI_Priority {
+  /** No priority preference */
+  OH_AI_PRIORITY_NONE = 0,
+  /** Low priority */
+  OH_AI_PRIORITY_LOW = 1,
+  /** Medium priority */
+  OH_AI_PRIORITY_MEDIUM = 2,
+  /** High priority */
+  OH_AI_PRIORITY_HIGH = 3
+} OH_AI_Priority;
+
+typedef struct NNRTDeviceDesc NNRTDeviceDesc;
 #ifdef __cplusplus
 }
 #endif
diff --git a/include/sdk_api/context.h b/include/sdk_api/context.h
index 5bfc9279..bf0ff0a6 100644
--- a/include/sdk_api/context.h
+++ b/include/sdk_api/context.h
@@ -174,6 +174,91 @@ OH_AI_API void OH_AI_DeviceInfoSetFrequency(OH_AI_DeviceInfoHandle device_info,
 /// \return NPU frequency
 OH_AI_API int OH_AI_DeviceInfoGetFrequency(const OH_AI_DeviceInfoHandle device_info);
 
+/// \brief Obtain the all device descriptions in NNRT.
+///
+/// \param[out] num Number of NNRT device description.
+///
+/// \return NNRT device description array.
+OH_AI_API NNRTDeviceDesc *OH_AI_GetAllNNRTDeviceDescs(size_t *num);
+
+/// \brief Destroy the NNRT device descriptions returned by OH_AI_NNRTGetAllDeviceDescs().
+///
+/// \param[in] desc NNRT device description array.
+OH_AI_API void OH_AI_DestroyAllNNRTDeviceDescs(NNRTDeviceDesc **desc);
+
+/// \brief Obtain the device id in NNRT device description.
+///
+/// \param[in] desc pointer to the NNRT device description instance.
+///
+/// \return NNRT device id.
+OH_AI_API size_t OH_AI_GetDeviceIdFromNNRTDeviceDesc(const NNRTDeviceDesc *desc);
+
+/// \brief Obtain the device name in NNRT device description.
+///
+/// \param[in] desc pointer to the NNRT device description instance.
+///
+/// \return NNRT device name.
+OH_AI_API const char *OH_AI_GetNameFromNNRTDeviceDesc(const NNRTDeviceDesc *desc);
+
+/// \brief Obtain the device type in NNRT device description.
+///
+/// \param[in] desc pointer to the NNRT device description instance.
+///
+/// \return NNRT device type.
+OH_AI_API OH_AI_NNRTDeviceType OH_AI_GetTypeFromNNRTDeviceDesc(const NNRTDeviceDesc *desc);
+
+/// \brief Create the NNRT device info by exactly matching the specific device name.
+///
+/// \param[in] name NNRt device name.
+///
+/// \return Device info object handle.
+OH_AI_API OH_AI_DeviceInfoHandle OH_AI_CreateNNRTDeviceInfoByName(const char *name);
+
+/// \brief Create the NNRT device info by finding the first device with the specific device type.
+///
+/// \param[in] name NNRt device type.
+///
+/// \return Device info object handle.
+OH_AI_API OH_AI_DeviceInfoHandle OH_AI_CreateNNRTDeviceInfoByType(OH_AI_NNRTDeviceType type);
+
+/// \brief Set the NNRT device id, Only valid for NNRT.
+///
+/// \param[in] device_info Device info object handle.
+/// \param[in] device_id NNRT device id.
+OH_AI_API void OH_AI_DeviceInfoSetDeviceId(OH_AI_DeviceInfoHandle device_info, size_t device_id);
+
+/// \brief Obtain the NNRT device id, Only valid for NNRT.
+///
+/// \param[in] device_info Device info object handle.
+///
+/// \return NNRT device id.
+OH_AI_API size_t OH_AI_DeviceInfoGetDeviceId(const OH_AI_DeviceInfoHandle device_info);
+
+/// \brief Set the NNRT performance mode, Only valid for NNRT.
+///
+/// \param[in] device_info Device info object handle.
+/// \param[in] device_id NNRT performance mode.
+OH_AI_API void OH_AI_DeviceInfoSetPerformanceMode(OH_AI_DeviceInfoHandle device_info, OH_AI_PerformanceMode mode);
+
+/// \brief Obtain the NNRT performance mode, Only valid for NNRT.
+///
+/// \param[in] device_info Device info object handle.
+///
+/// \return NNRT performance mode.
+OH_AI_API OH_AI_PerformanceMode OH_AI_DeviceInfoGetPerformanceMode(const OH_AI_DeviceInfoHandle device_info);
+
+/// \brief Set the NNRT priority, Only valid for NNRT.
+///
+/// \param[in] device_info Device info object handle.
+/// \param[in] device_id NNRT priority.
+OH_AI_API void OH_AI_DeviceInfoSetPriority(OH_AI_DeviceInfoHandle device_info, OH_AI_Priority priority);
+
+/// \brief Obtain the NNRT priority, Only valid for NNRT.
+///
+/// \param[in] device_info Device info object handle.
+///
+/// \return NNRT priority.
+OH_AI_API OH_AI_Priority OH_AI_DeviceInfoGetPriority(const OH_AI_DeviceInfoHandle device_info);
 #ifdef __cplusplus
 }
 #endif
diff --git a/include/sdk_api/types.h b/include/sdk_api/types.h
index a39c6daa..d38660b0 100644
--- a/include/sdk_api/types.h
+++ b/include/sdk_api/types.h
@@ -40,10 +40,46 @@ typedef enum OH_AI_DeviceType {
   OH_AI_DEVICETYPE_KIRIN_NPU,
   // add new type here
   // ohos-only device range: [60, 80)
-  OH_AI_DeviceType_NNRT = 60,
+  OH_AI_DEVICETYPE_NNRT = 60,
   OH_AI_DEVICETYPE_INVALID = 100,
 } OH_AI_DeviceType;
 
+typedef enum OH_AI_NNRTDeviceType {
+  /** Devices that are not CPU, GPU, or dedicated accelerator */
+  OH_AI_NNRTDEVICE_OTHERS = 0,
+  /** CPU device */
+  OH_AI_NNRTDEVICE_CPU = 1,
+  /** GPU device */
+  OH_AI_NNRTDEVICE_GPU = 2,
+  /** Dedicated hardware accelerator */
+  OH_AI_NNRTDEVICE_ACCELERATOR = 3,
+} OH_AI_NNRTDeviceType;
+
+typedef enum OH_AI_PerformanceMode {
+  /** No performance mode preference */
+  OH_AI_PERFORMANCE_NONE = 0,
+  /** Low power consumption mode*/
+  OH_AI_PERFORMANCE_LOW = 1,
+  /** Medium performance mode */
+  OH_AI_PERFORMANCE_MEDIUM = 2,
+  /** High performance mode */
+  OH_AI_PERFORMANCE_HIGH = 3,
+  /** Ultimate performance mode */
+  OH_AI_PERFORMANCE_EXTREME = 4
+} OH_AI_PerformanceMode;
+
+typedef enum OH_AI_Priority {
+  /** No priority preference */
+  OH_AI_PRIORITY_NONE = 0,
+  /** Low priority */
+  OH_AI_PRIORITY_LOW = 1,
+  /** Medium priority */
+  OH_AI_PRIORITY_MEDIUM = 2,
+  /** High priority */
+  OH_AI_PRIORITY_HIGH = 3
+} OH_AI_Priority;
+
+typedef struct NNRTDeviceDesc NNRTDeviceDesc;
 #ifdef __cplusplus
 }
 #endif
diff --git a/mindspore/lite/include/context.h b/mindspore/lite/include/context.h
index 22bd24df..7e1e06f3 100644
--- a/mindspore/lite/include/context.h
+++ b/mindspore/lite/include/context.h
@@ -52,6 +52,10 @@ typedef struct AscendDeviceInfo {
 } AscendDeviceInfo;
 
 typedef struct NNRtDeviceInfo {
+  uint32_t device_id_ = 0;
+  int priority_ = 0;
+  int performance_mode_ = 0;
+  bool enable_fp16_ = false;
 } NNRtDeviceInfo;
 
 /// \brief DeviceInfo defined for backend's configuration information.
diff --git a/mindspore/lite/include/model.h b/mindspore/lite/include/model.h
index 44cba37b..a54904c8 100644
--- a/mindspore/lite/include/model.h
+++ b/mindspore/lite/include/model.h
@@ -24,6 +24,7 @@ namespace mindspore {
 namespace schema {
 struct Tensor;
 }  // namespace schema
+
 namespace lite {
 typedef enum { ModelType_MSLite, ModelType_MindIR } LiteModelType;
 
@@ -61,7 +62,10 @@ struct LiteGraph {
   bool model_obfuscated_ = false;
   std::vector<unsigned char *> deobf_prims_;
 #endif
+
+  std::string ToString() const;
 };
+
 struct Model {
   LiteGraph graph_;
   char *buf = nullptr;
diff --git a/mindspore/lite/src/runtime/c_api/context_c.cc b/mindspore/lite/src/runtime/c_api/context_c.cc
index d030e931..2a0a6d06 100644
--- a/mindspore/lite/src/runtime/c_api/context_c.cc
+++ b/mindspore/lite/src/runtime/c_api/context_c.cc
@@ -14,8 +14,13 @@
  * limitations under the License.
  */
 #include "include/c_api/context_c.h"
+#include <string.h>
 #include "src/runtime/c_api/context_c.h"
+#include "src/runtime/c_api/type_c_private.h"
 #include "src/common/log_adapter.h"
+#ifdef SUPPORT_NNRT
+#include "interfaces/kits/c/neural_network_runtime.h"
+#endif
 
 // ================ Context ================
 OH_AI_ContextHandle OH_AI_ContextCreate() {
@@ -238,3 +243,194 @@ int OH_AI_DeviceInfoGetFrequency(const OH_AI_DeviceInfoHandle device_info) {  //
     return -1;
   }
 }
+
+NNRTDeviceDesc *OH_AI_GetAllNNRTDeviceDescs(size_t *num) {
+  if (num == nullptr) {
+    MS_LOG(ERROR) << "Input num is null";
+    return nullptr;
+  }
+#ifdef SUPPORT_NNRT
+  *num = 0;
+
+  const size_t *all_device_ids;
+  uint32_t device_count;
+  auto ret = OH_NNDevice_GetAllDevicesID(&all_device_ids, &device_count);
+  if ((ret != OH_NN_SUCCESS) || (device_count == 0)) {
+    MS_LOG(ERROR) << "NNRT get all device id failed, ret: " << ret;
+    return nullptr;
+  }
+
+  NNRTDeviceDesc *desc = (NNRTDeviceDesc *)malloc(sizeof(NNRTDeviceDesc) * device_count);
+  if (desc == nullptr) {
+    MS_LOG(ERROR) << "NNRT allocate desc failed";
+    return nullptr;
+  }
+
+  for (uint32_t i = 0; i < device_count; i++) {
+    desc[i].device_id = all_device_ids[i];
+    OH_NN_DeviceType type;
+    (void)OH_NNDevice_GetType(all_device_ids[i], &type);
+    desc[i].device_type = static_cast<OH_AI_NNRTDeviceType>(type);
+
+    const char *name = nullptr;
+    (void)OH_NNDevice_GetName(all_device_ids[i], &name);
+    desc[i].device_name[127] = '\0';
+    strncpy(desc[i].device_name, name, 127);
+  }
+  *num = device_count;
+  return desc;
+#else
+  return nullptr;
+#endif
+}
+
+void OH_AI_DestroyAllNNRTDeviceDescs(NNRTDeviceDesc **desc) {
+  if (desc == nullptr) {
+    MS_LOG(WARNING) << "desc is null";
+    return;
+  }
+  free(*desc);
+  *desc = nullptr;
+}
+
+size_t OH_AI_GetDeviceIdFromNNRTDeviceDesc(const NNRTDeviceDesc *desc) {
+  if (desc == nullptr) {
+    MS_LOG(ERROR) << "NNRT desc is null";
+    return 0;
+  }
+  return desc->device_id;
+}
+
+const char *OH_AI_GetNameFromNNRTDeviceDesc(const NNRTDeviceDesc *desc) {
+  if (desc == nullptr) {
+    MS_LOG(ERROR) << "NNRT desc is null";
+    return nullptr;
+  }
+  return desc->device_name;
+}
+
+OH_AI_NNRTDeviceType OH_AI_GetTypeFromNNRTDeviceDesc(const NNRTDeviceDesc *desc) {
+  if (desc == nullptr) {
+    MS_LOG(ERROR) << "NNRT desc is null";
+    return OH_AI_NNRTDeviceType::OH_AI_NNRTDEVICE_OTHERS;
+  }
+  return desc->device_type;
+}
+
+OH_AI_DeviceInfoHandle OH_AI_CreateNNRTDeviceInfoByName(const char *name) {
+  size_t num = 0;
+  NNRTDeviceDesc *desc = OH_AI_GetAllNNRTDeviceDescs(&num);
+  if (desc == nullptr) {
+    MS_LOG(ERROR) << "Get all device desc failed";
+    return nullptr;
+  }
+
+  OH_AI_DeviceInfoHandle handle = nullptr;
+  for (size_t i = 0; i < num; i++) {
+    if (strncmp(desc[i].device_name, name, NNRT_DEVICE_NAME_MAX - 1) == 0) {
+      handle = OH_AI_DeviceInfoCreate(OH_AI_DEVICETYPE_NNRT);
+      OH_AI_DeviceInfoSetDeviceId(handle, desc[i].device_id);
+      break;
+    }
+  }
+  OH_AI_DestroyAllNNRTDeviceDescs(&desc);
+  return handle;
+}
+
+OH_AI_DeviceInfoHandle OH_AI_CreateNNRTDeviceInfoByType(OH_AI_NNRTDeviceType type) {
+  size_t num = 0;
+  NNRTDeviceDesc *desc = OH_AI_GetAllNNRTDeviceDescs(&num);
+  if (desc == nullptr) {
+    MS_LOG(ERROR) << "Get all device desc failed";
+    return nullptr;
+  }
+
+  OH_AI_DeviceInfoHandle handle = nullptr;
+  for (size_t i = 0; i < num; i++) {
+    if (desc[i].device_type == type) {
+      handle = OH_AI_DeviceInfoCreate(OH_AI_DEVICETYPE_NNRT);
+      OH_AI_DeviceInfoSetDeviceId(handle, desc[i].device_id);
+      break;
+    }
+  }
+  OH_AI_DestroyAllNNRTDeviceDescs(&desc);
+  return handle;
+}
+
+void OH_AI_DeviceInfoSetDeviceId(OH_AI_DeviceInfoHandle device_info, size_t device_id) {
+  if (device_info == nullptr) {
+    MS_LOG(ERROR) << "device info is null";
+    return;
+  }
+  if (OH_AI_DeviceInfoGetDeviceType(device_info) != OH_AI_DEVICETYPE_NNRT) {
+    MS_LOG(ERROR) << "Set device_id of non-NNRT device is not allowable, ignored";
+    return;
+  }
+  auto impl = reinterpret_cast<mindspore::DeviceInfoC *>(device_info);
+  impl->device_id = device_id;
+}
+
+size_t OH_AI_DeviceInfoGetDeviceId(const OH_AI_DeviceInfoHandle device_info) {
+  if (device_info == nullptr) {
+    MS_LOG(ERROR) << "device info is null";
+    return 0;
+  }
+  if (OH_AI_DeviceInfoGetDeviceType(device_info) != OH_AI_DEVICETYPE_NNRT) {
+    MS_LOG(ERROR) << "Get device_id of non-NNRT device is not allowable, ignored";
+    return 0;
+  }
+  auto impl = reinterpret_cast<mindspore::DeviceInfoC *>(device_info);
+  return impl->device_id;
+}
+
+void OH_AI_DeviceInfoSetPerformanceMode(OH_AI_DeviceInfoHandle device_info, OH_AI_PerformanceMode mode) {
+  if (device_info == nullptr) {
+    MS_LOG(ERROR) << "device info is null";
+    return;
+  }
+  if (OH_AI_DeviceInfoGetDeviceType(device_info) != OH_AI_DEVICETYPE_NNRT) {
+    MS_LOG(ERROR) << "Set performance_mode of non-NNRT device is not allowable, ignored";
+    return;
+  }
+  auto impl = reinterpret_cast<mindspore::DeviceInfoC *>(device_info);
+  impl->performance_mode = mode;
+}
+
+OH_AI_PerformanceMode OH_AI_DeviceInfoGetPerformanceMode(const OH_AI_DeviceInfoHandle device_info) {
+  if (device_info == nullptr) {
+    MS_LOG(ERROR) << "device info is null";
+    return OH_AI_PERFORMANCE_NONE;
+  }
+  if (OH_AI_DeviceInfoGetDeviceType(device_info) != OH_AI_DEVICETYPE_NNRT) {
+    MS_LOG(ERROR) << "Get performance_mode of non-NNRT device is not allowable, ignored";
+    return OH_AI_PERFORMANCE_NONE;
+  }
+  auto impl = reinterpret_cast<mindspore::DeviceInfoC *>(device_info);
+  return impl->performance_mode;
+}
+
+void OH_AI_DeviceInfoSetPriority(OH_AI_DeviceInfoHandle device_info, OH_AI_Priority priority) {
+  if (device_info == nullptr) {
+    MS_LOG(ERROR) << "device info is null";
+    return;
+  }
+  if (OH_AI_DeviceInfoGetDeviceType(device_info) != OH_AI_DEVICETYPE_NNRT) {
+    MS_LOG(ERROR) << "Set priority of non-NNRT device is not allowable, ignored";
+    return;
+  }
+  auto impl = reinterpret_cast<mindspore::DeviceInfoC *>(device_info);
+  impl->priority = priority;
+}
+
+OH_AI_Priority OH_AI_DeviceInfoGetPriority(const OH_AI_DeviceInfoHandle device_info) {
+  if (device_info == nullptr) {
+    MS_LOG(ERROR) << "device info is null";
+    return OH_AI_PRIORITY_NONE;
+  }
+  if (OH_AI_DeviceInfoGetDeviceType(device_info) != OH_AI_DEVICETYPE_NNRT) {
+    MS_LOG(ERROR) << "Get priority of non-NNRT device is not allowable, ignored";
+    return OH_AI_PRIORITY_NONE;
+  }
+  auto impl = reinterpret_cast<mindspore::DeviceInfoC *>(device_info);
+  return impl->priority;
+}
diff --git a/mindspore/lite/src/runtime/c_api/context_c.h b/mindspore/lite/src/runtime/c_api/context_c.h
index 7b9db3ea..0fb2f3e7 100644
--- a/mindspore/lite/src/runtime/c_api/context_c.h
+++ b/mindspore/lite/src/runtime/c_api/context_c.h
@@ -29,6 +29,9 @@ typedef struct DeviceInfoC {
   OH_AI_DeviceType device_type;
   bool enable_fp16 = false;
   int frequency = 3;
+  size_t device_id = 0;
+  OH_AI_PerformanceMode performance_mode = OH_AI_PERFORMANCE_NONE;
+  OH_AI_Priority priority = OH_AI_PRIORITY_NONE;
   std::string provider;
   std::string provider_device;
   std::shared_ptr<Allocator> allocator = nullptr;
diff --git a/mindspore/lite/src/runtime/c_api/type_c_private.h b/mindspore/lite/src/runtime/c_api/type_c_private.h
new file mode 100644
index 00000000..b3b64748
--- /dev/null
+++ b/mindspore/lite/src/runtime/c_api/type_c_private.h
@@ -0,0 +1,40 @@
+/**
+ * Copyright 2023 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef MINDSPORE_LITE_SRC_RUNTIME_C_API_TYPE_C_PRIVATE_H_
+#define MINDSPORE_LITE_SRC_RUNTIME_C_API_TYPE_C_PRIVATE_H_
+
+#include <string>
+#include <vector>
+#include <memory>
+#include <stddef.h>
+#include "include/c_api/types_c.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define NNRT_DEVICE_NAME_MAX (128)
+
+struct NNRTDeviceDesc {
+  size_t device_id;
+  OH_AI_NNRTDeviceType device_type;
+  char device_name[NNRT_DEVICE_NAME_MAX];
+};
+
+#ifdef __cplusplus
+}
+#endif
+#endif  // MINDSPORE_LITE_SRC_RUNTIME_C_API_TYPE_C_PRIVATE_H_
diff --git a/mindspore/lite/src/runtime/cxx_api/context.cc b/mindspore/lite/src/runtime/cxx_api/context.cc
index d550975b..6ac926b9 100644
--- a/mindspore/lite/src/runtime/cxx_api/context.cc
+++ b/mindspore/lite/src/runtime/cxx_api/context.cc
@@ -48,6 +48,10 @@ constexpr auto KModelOptionAscendFusionSwitchCfgPath = "mindspore.option.ascend.
 constexpr auto kModelOptionAscendDynamicBatchSize = "mindspore.option.ascend.dynamic_batch_size";
 constexpr auto kModelOptionAscendDynamicImageSize = "mindspore.option.ascend.dynamic_image_size";
 constexpr auto kModelOptionAscendBufferOptimize = "mindspore.option.ascend.buffer_optimize";
+constexpr auto kModelOptionNNRTDeviceID = "mindspore.option.nnrt.device_id";
+constexpr auto kModelOptionNNRTPerformanceMode = "mindspore.option.nnrt.performance_mode";
+constexpr auto kModelOptionNNRTPriority = "mindspore.option.nnrt.priority";
+constexpr auto kModelOptionNNRTEnableFP16 = "mindspore.option.nnrt.enable_fp16";
 
 Context::Context() : data_(std::make_shared<Data>()) {}
 
@@ -586,4 +590,68 @@ std::vector<char> AscendDeviceInfo::GetBufferOptimizeModeChar() const {
   const std::string &ref = GetValue<std::string>(data_, kModelOptionAscendBufferOptimize);
   return StringToChar(ref);
 }
+
+void NNRTDeviceInfo::SetDeviceID(size_t device_id) {
+  if (data_ == nullptr) {
+    MS_LOG(ERROR) << "Invalid context.";
+    return;
+  }
+  data_->params[kModelOptionNNRTDeviceID] = device_id;
+}
+
+size_t NNRTDeviceInfo::GetDeviceID() const {
+  if (data_ == nullptr) {
+    MS_LOG(ERROR) << "Invalid context.";
+    return 0;
+  }
+  return GetValue<size_t>(data_, kModelOptionNNRTDeviceID);
+}
+
+void NNRTDeviceInfo::SetPerformanceMode(int performance_mode) {
+  if (data_ == nullptr) {
+    MS_LOG(ERROR) << "Invalid context.";
+    return;
+  }
+  data_->params[kModelOptionNNRTPerformanceMode] = performance_mode;
+}
+
+int NNRTDeviceInfo::GetPerformanceMode() const {
+  if (data_ == nullptr) {
+    MS_LOG(ERROR) << "Invalid context.";
+    return 0;
+  }
+  return GetValue<int>(data_, kModelOptionNNRTPerformanceMode);
+}
+
+void NNRTDeviceInfo::SetPriority(int priority) {
+  if (data_ == nullptr) {
+    MS_LOG(ERROR) << "Invalid context.";
+    return;
+  }
+  data_->params[kModelOptionNNRTPriority] = priority;
+}
+
+int NNRTDeviceInfo::GetPriority() const {
+  if (data_ == nullptr) {
+    MS_LOG(ERROR) << "Invalid context.";
+    return 0;
+  }
+  return GetValue<int>(data_, kModelOptionNNRTPriority);
+}
+
+void NNRTDeviceInfo::SetEnableFP16(bool is_fp16) {
+  if (data_ == nullptr) {
+    MS_LOG(ERROR) << "Invalid context.";
+    return;
+  }
+  data_->params[kModelOptionNNRTEnableFP16] = is_fp16;
+}
+
+bool NNRTDeviceInfo::GetEnableFP16() const {
+  if (data_ == nullptr) {
+    MS_LOG(ERROR) << "Invalid context.";
+    return false;
+  }
+  return GetValue<bool>(data_, kModelOptionNNRTEnableFP16);
+}
 }  // namespace mindspore
diff --git a/mindspore/lite/src/runtime/cxx_api/converters.cc b/mindspore/lite/src/runtime/cxx_api/converters.cc
index 23a02778..5f2bd40f 100644
--- a/mindspore/lite/src/runtime/cxx_api/converters.cc
+++ b/mindspore/lite/src/runtime/cxx_api/converters.cc
@@ -72,8 +72,13 @@ Status ContextUtils::AddAscendDevice(lite::InnerContext *inner_context, DeviceIn
   return kSuccess;
 }
 
-Status ContextUtils::AddNNRtDevice(lite::InnerContext *inner_context) {
+Status ContextUtils::AddNNRtDevice(lite::InnerContext *inner_context, size_t device_id, int performance_mode,
+                                   int priority, bool enable_fp16) {
   lite::DeviceInfo device_info = {0};
+  device_info.nnrt_device_info_.device_id_ = device_id;
+  device_info.nnrt_device_info_.performance_mode_ = performance_mode;
+  device_info.nnrt_device_info_.priority_ = priority;
+  device_info.nnrt_device_info_.enable_fp16_ = enable_fp16;
   inner_context->device_list_.push_back({lite::DT_NNRT, device_info});
   return kSuccess;
 }
@@ -122,7 +127,10 @@ lite::InnerContext *ContextUtils::Convert(Context *context) {
     } else if (device->GetDeviceType() == kAscend) {
       ret = AddAscendDevice(inner_context.get(), device.get());
     } else if (device->GetDeviceType() == kNNRt) {
-      ret = AddNNRtDevice(inner_context.get());
+      auto nnrt_device_info = device->Cast<NNRTDeviceInfo>();
+      ret = AddNNRtDevice(inner_context.get(), nnrt_device_info->GetDeviceID(),
+                          nnrt_device_info->GetPerformanceMode(), nnrt_device_info->GetPriority(),
+                          nnrt_device_info->GetEnableFP16());
     }
     if (ret != kSuccess) {
       MS_LOG(ERROR) << "Add device failed!";
@@ -162,7 +170,8 @@ lite::InnerContext *ContextUtils::Convert(const ContextC *context_c) {
     } else if (device_info_c->device_type == OH_AI_DEVICETYPE_KIRIN_NPU) {
       ret = AddNpuDevice(device_info_c->frequency, inner_context.get());
     } else if (device_info_c->device_type == OH_AI_DEVICETYPE_NNRT) {
-      ret = AddNNRtDevice(inner_context.get());
+      ret = AddNNRtDevice(inner_context.get(), device_info_c->device_id, device_info_c->performance_mode,
+                          device_info_c->priority, device_info_c->enable_fp16);
     }
     if (ret != kSuccess) {
       MS_LOG(ERROR) << "Add device failed!";
diff --git a/mindspore/lite/src/runtime/cxx_api/converters.h b/mindspore/lite/src/runtime/cxx_api/converters.h
index 11338875..bd7daabb 100644
--- a/mindspore/lite/src/runtime/cxx_api/converters.h
+++ b/mindspore/lite/src/runtime/cxx_api/converters.h
@@ -45,7 +45,7 @@ class ContextUtils {
                              lite::InnerContext *inner_context);
   static Status AddNpuDevice(int frequency, lite::InnerContext *inner_context);
   static Status AddAscendDevice(lite::InnerContext *inner_context, DeviceInfoContext *device);
-  static Status AddNNRtDevice(lite::InnerContext *inner_context);
+  static Status AddNNRtDevice(lite::InnerContext *inner_context, size_t device_id, int performance_mode, int priority, bool enable_fp16);
   static bool IsAffinityModeValid(int affinity_mode) {
     return affinity_mode >= lite::NO_BIND && affinity_mode <= lite::MID_CPU;
   }
diff --git a/mindspore/lite/src/runtime/delegate/nnrt/nnrt_delegate.cc b/mindspore/lite/src/runtime/delegate/nnrt/nnrt_delegate.cc
index 67d4e6c4..8b6e5ba4 100644
--- a/mindspore/lite/src/runtime/delegate/nnrt/nnrt_delegate.cc
+++ b/mindspore/lite/src/runtime/delegate/nnrt/nnrt_delegate.cc
@@ -13,6 +13,9 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
+
+#include <unordered_set>
+#include <numeric>
 #include "nnrt_delegate.h"
 #include "checker/primitive_check.h"
 #include "src/common/log_adapter.h"
@@ -21,139 +24,432 @@
 #include "nnrt_model_kernel.h"
 #include "schema/model_generated.h"
 #include "flatbuffers/flatbuffers.h"
+#include "runtime/tensor_category.h"
 
-mindspore::Status mindspore::NNRTDelegate::Build(DelegateModel<schema::Primitive> *model) {
+namespace mindspore {
+namespace lite {
+Status NNRTDelegate::Build(DelegateModel<schema::Primitive> *model) {
   MS_LOG(DEBUG) << "Start to build NNRT model.";
-  if (this->nnrt_lite_graph == nullptr) {
-    MS_LOG(ERROR) << "nnrt_lite_graph is nullptr.";
-    return mindspore::kLiteError;
+  if ((lite_graph_ == nullptr) || (lite_graph_->sub_graphs_.size() > 1)) {
+    MS_LOG(WARNING) << "LiteGraph contains more than one subgraph. NNRT does not support control-flow model yet, fallback to CPU";
+    return kSuccess;
   }
-  if (this->nnrt_lite_graph->sub_graphs_.empty()) {
-    // must have at lease one subgraph
-    MS_LOG(ERROR) << "must have at lease one subgraph";
-    return mindspore::kLiteError;
+
+  OH_NNModel *full_model = BuildFullNNModel();
+  if (full_model == nullptr) {
+    MS_LOG(WARNING) << "Build full NNModel failed, fallback to CPU";
+    return kSuccess;
   }
-  OH_NN_ReturnCode ret_code;
-  OH_NNModel *oh_nnmodel = OH_NNModel_Construct();
-  if (oh_nnmodel == nullptr) {
-    MS_LOG(ERROR) << "Construct NNModel failed, oh_nnmodel is nullptr.";
-    return mindspore::kLiteError;
+  std::vector<bool> op_supports = QueryOpSupports(full_model);
+  if (op_supports.empty()) {
+    MS_LOG(WARNING) << "Query no op supports for full model, fallback to CPU";
+    OH_NNModel_Destroy(&full_model);
+    return kSuccess;
   }
+  auto nnrt_subgraph_ranges = GetNNRTSubgraphRanges(model, op_supports);
+  MS_LOG(INFO) << "Found NNRT subgraph count: " << nnrt_subgraph_ranges.size();
 
-  ret_code = OH_NNModel_BuildFromLiteGraph(oh_nnmodel, this->nnrt_lite_graph);
-  if (ret_code != OH_NN_SUCCESS) {
-    MS_LOG(ERROR) << "Build NNModel failed, OH_NN_ReturnCode = " << ret_code;
-    OH_NNModel_Destroy(&oh_nnmodel);
-    return mindspore::kLiteError;
+  std::vector<LiteGraph *> sub_lite_graphs;
+  auto ret = CreateLiteGraphForNNRTSubgraph(nnrt_subgraph_ranges, &sub_lite_graphs);
+  if (ret != kSuccess) {
+    OH_NNModel_Destroy(&full_model);
+    MS_LOG(WARNING) << "Create NNRT sub LiteGraph failed, fallback to CPU";
+    return kSuccess;
   }
-  MS_LOG(DEBUG) << "NNRTDelegate creates NNModel success.";
 
-  OH_NNCompilation *oh_nn_compilation = nullptr;
-  oh_nn_compilation = OH_NNCompilation_Construct(oh_nnmodel);
+  std::vector<NNRTModelKernel *> nnrt_subgraph_kernels;
+  ret = CreateNNRTSubgraphKernels(model, sub_lite_graphs, nnrt_subgraph_ranges, &nnrt_subgraph_kernels);
+  if (ret != kSuccess) {
+    OH_NNModel_Destroy(&full_model);
+    MS_LOG(WARNING) << "Create NNRT subgraph kernel failed, fallback to CPU";
+    return kSuccess;
+  }
+
+  ReplaceNNRTKernelsInDelegateModel(model, nnrt_subgraph_ranges, nnrt_subgraph_kernels);
+  OH_NNModel_Destroy(&full_model);
+  MS_LOG(INFO) << "NNRTDelegate build success.";
+  return kSuccess;
+}
+
+OH_NNModel *NNRTDelegate::BuildFullNNModel() {
+  if (lite_graph_ == nullptr) {
+    MS_LOG(ERROR) << "Lite graph is null";
+    return nullptr;
+  }
 
-  if (oh_nn_compilation == nullptr) {
-    MS_LOG(ERROR) << "Construct NNCompilation failed";
-    OH_NNModel_Destroy(&oh_nnmodel);
-    return mindspore::kLiteError;
+  if (lite_graph_->sub_graphs_.empty()) {
+    MS_LOG(ERROR) << "Lite graph must have at lease one subgraph";
+    return nullptr;
   }
-  MS_LOG(DEBUG) << "NNRTDelegate creates NNCompilation success.";
 
-  const size_t *allDevicesID = nullptr;
-  uint32_t device_count = 0;
-  ret_code = OH_NNDevice_GetAllDevicesID(&allDevicesID, &device_count);
-  if (ret_code != OH_NN_SUCCESS) {
-    MS_LOG(ERROR) << "NNModel GetAllDevicesID failed, OH_NN_ReturnCode = " << ret_code;
-    OH_NNCompilation_Destroy(&oh_nn_compilation);
-    OH_NNModel_Destroy(&oh_nnmodel);
-    return mindspore::kLiteError;
+  OH_NNModel *nn_model = OH_NNModel_Construct();
+  if (nn_model == nullptr) {
+    MS_LOG(ERROR) << "Create NNModel failed, result is nullptr";
+    return nullptr;
   }
 
-  if (device_count <= 0) {
-    MS_LOG(WARNING) << "No NNRt Device found, fall back to CPU. ";
-    // OH_NNCompilation_Destroy(&oh_nn_compilation);
-    // OH_NNModel_Destroy(&oh_nnmodel);
-    return mindspore::kSuccess;
+  auto ret = OH_NNModel_BuildFromLiteGraph(nn_model, lite_graph_);
+  if (ret != OH_NN_SUCCESS) {
+    MS_LOG(ERROR) << "Build NNModel failed, ret: " << ret;
+    OH_NNModel_Destroy(&nn_model);
+    return nullptr;
   }
-  MS_LOG(DEBUG) << "NNRTDelegate GetAllDevicesID success. device_count: " << device_count;
+  return nn_model;
+}
 
-  // check if model ops are supported
-  const bool *issupported = nullptr;
+std::vector<bool> NNRTDelegate::QueryOpSupports(OH_NNModel *nn_model) {
+  const bool *is_supported = nullptr; // Note: this memory is owned by nn_model, don't free alone.
   uint32_t op_count = 0;
-  ret_code = OH_NNModel_GetAvailableOperations(oh_nnmodel, allDevicesID[0], &issupported, &op_count);
-  if (ret_code != OH_NN_SUCCESS) {
-    MS_LOG(ERROR) << "NNModel GetAvailableOperations failed, OH_NN_ReturnCode = " << ret_code
-                  << ", maybe due to dataParcel data length limitaion. Fall back to CPU.";
-    OH_NNCompilation_Destroy(&oh_nn_compilation);
-    OH_NNModel_Destroy(&oh_nnmodel);
-    return mindspore::kSuccess;
+  auto ret = OH_NNModel_GetAvailableOperations(nn_model, nnrt_device_info_.device_id_, &is_supported, &op_count);
+  if (ret != OH_NN_SUCCESS) {
+    MS_LOG(WARNING) << "NNModel GetAvailableOperations failed, ret: " << ret
+                  << ", maybe caused by dataParcel data length limitation";
+    return {};
   }
-  uint32_t supported_op_count = 0;
-  for (uint32_t i = 0; i < op_count; i++) {
-    if (issupported[i]) {
-      supported_op_count++;
+  std::vector<bool> op_supports(is_supported, is_supported + op_count);
+  return op_supports;
+}
+
+/* Find continuous sub-sequence in op_supports. */
+std::vector<NNRTOpRange> NNRTDelegate::GetNNRTSubgraphRanges(DelegateModel<schema::Primitive> *model,
+                                                             const std::vector<bool> &op_supports) {
+  std::vector<NNRTOpRange> nnrt_subgraph_ranges;
+  NNRTOpRange op_range;
+  bool start_count = false;
+  for (size_t i = 0; i < op_supports.size(); i++) {
+    if (op_supports[i]) {
+      if (start_count == false) {
+        start_count = true;
+        op_range.begin_index_ = i;
+        op_range.begin_iter_ = model->BeginKernelIterator() + i;
+      }
+    } else {
+      if (start_count == true) {
+        start_count = false;
+        op_range.end_index_ = i;
+        op_range.end_iter_ = model->BeginKernelIterator() + i;
+        nnrt_subgraph_ranges.push_back(op_range);
+      }
     }
   }
-  if (op_count != supported_op_count) {
-    MS_LOG(WARNING) << "this model has " << op_count << "ops, but NNRT only support " << supported_op_count
-                    << " ops, fall back to CPU.";
-    // must support all op, else fall back to CPU
-    OH_NNCompilation_Destroy(&oh_nn_compilation);
-    OH_NNModel_Destroy(&oh_nnmodel);
-    return mindspore::kSuccess;
+  // handle last true subsequence
+  if (start_count == true) {
+    op_range.end_index_ = op_supports.size();
+    op_range.end_iter_ = model->EndKernelIterator();
+    nnrt_subgraph_ranges.push_back(op_range);
+    MS_LOG(INFO) << "Schedule NNRT subgraph range: [" << op_range.begin_index_ << ", " << op_range.end_index_ << ")";
   }
-  MS_LOG(DEBUG) << "NNRtDelegate supports all op in this model.";
+  return nnrt_subgraph_ranges;
+}
 
-  ret_code = OH_NNCompilation_SetDevice(oh_nn_compilation, allDevicesID[0]);
+/**
+ * This method ONLY works when the follow pre-conditions are satisfied:
+ * 1. The node order of lite_graph_->all_nodes should be consistent with DelegateModel sequence.
+ *  This ensures the kernel replacement in DelegateModel based on the re-organizing info from lite_graph_ is correct.
+ * 2. The node indices of lite_graph_->sub_graphs[0].node_indices should be monotonically increasing from 0 to size - 1.
+ */
+Status NNRTDelegate::CreateLiteGraphForNNRTSubgraph(
+    const std::vector<NNRTOpRange> &nnrt_op_ranges,
+    std::vector<LiteGraph *> *sub_lite_graphs) {
+  MS_LOG(INFO) << "Start creating LiteGraph for NNRT subgraph";
+  for (const auto &op_range: nnrt_op_ranges) {
+    MS_LOG(INFO) << "Process op range: [" << op_range.begin_index_ << ", " << op_range.end_index_ << ")";
+    LiteGraph *sub_lite_graph = new (std::nothrow)LiteGraph;
+    if (sub_lite_graph == nullptr) {
+      MS_LOG(ERROR) << "Allocate LiteGraph failed";
+      return kLiteError;
+    }
+    sub_lite_graph->name_ = lite_graph_->name_;
+    sub_lite_graph->version_ = lite_graph_->version_;
 
-  if (ret_code != OH_NN_SUCCESS) {
-    MS_LOG(ERROR) << "NNCompilation SetDevice failed, OH_NN_ReturnCode = " << ret_code;
-    OH_NNCompilation_Destroy(&oh_nn_compilation);
-    OH_NNModel_Destroy(&oh_nnmodel);
-    return mindspore::kLiteError;
+    auto sub_graph = new (std::nothrow)LiteGraph::SubGraph;
+    if (sub_graph == nullptr) {
+      MS_LOG(ERROR) << "Allocate SubGraph failed";
+      return kLiteError;
+    }
+    sub_graph->name_ = lite_graph_->name_;
+    sub_lite_graph->sub_graphs_.push_back(sub_graph);
+
+    // deal with all_nodes
+    MS_LOG(INFO) << "Assemble all_nodes...";
+    int new_node_index = 0;
+    std::map<uint32_t, schema::Tensor *> in_tensor_index_map;
+    std::map<uint32_t, schema::Tensor *> out_tensor_index_map;
+    for (size_t index = op_range.begin_index_; index < op_range.end_index_; index++) {
+      LiteGraph::Node *node = new (std::nothrow)LiteGraph::Node;
+      if (node == nullptr) {
+        MS_LOG(ERROR) << "Allocate Node failed";
+        return kLiteError;
+      }
+      *node = *lite_graph_->all_nodes_[index];
+      sub_lite_graph->all_nodes_.push_back(node);
+      sub_graph->node_indices_.push_back(new_node_index++);
+
+      for (auto i: node->input_indices_) {
+        in_tensor_index_map.emplace(i, lite_graph_->all_tensors_[i]);
+      }
+      for (auto i: node->output_indices_) {
+        out_tensor_index_map.emplace(i, lite_graph_->all_tensors_[i]);
+      }
+    }
+
+    // deal with all_tensors
+    MS_LOG(INFO) << "Assemble all_tensors...";
+    std::set<schema::Tensor *> tensors;
+    for (auto iter: in_tensor_index_map) {
+      tensors.emplace(iter.second);
+    }
+    for (auto iter: out_tensor_index_map) {
+      tensors.emplace(iter.second);
+    }
+
+    uint32_t new_index = 0;
+    std::map<schema::Tensor *, uint32_t> new_tensor_maps;
+    for (auto tensor: tensors) {
+      new_tensor_maps.emplace(tensor, new_index++);
+    }
+
+    sub_lite_graph->all_tensors_ = std::vector<schema::Tensor *>(tensors.begin(), tensors.end());
+
+    // deal with every node's input/output indices
+    MS_LOG(INFO) << "Set input/output indices of each node...";
+    for (auto node: sub_lite_graph->all_nodes_) {
+      for (auto &index : node->input_indices_) {
+        index = new_tensor_maps.at(in_tensor_index_map.at(index));
+      }
+      for (auto &index : node->output_indices_) {
+        index = new_tensor_maps.at(out_tensor_index_map.at(index));
+      }
+    }
+
+    // deal with subgraph's input/output indices
+    MS_LOG(INFO) << "Set input/output indices of each subgraph...";
+    sub_graph->tensor_indices_ = std::vector<uint32_t>(tensors.size());
+    std::iota(sub_graph->tensor_indices_.begin(), sub_graph->tensor_indices_.end(), 0U);
+
+    for (auto iter: in_tensor_index_map) {
+      auto new_tensor_index = new_tensor_maps[iter.second];
+      MS_LOG(DEBUG) << "handle input: old: " << iter.first << ", new: " << new_tensor_index << std::endl;
+      if (IsConstTensor(*iter.second)) {
+        MS_LOG(DEBUG) << "- tensor: " << new_tensor_index << " is const." << std::endl;
+        continue;
+      }
+
+      bool is_subgraph_input = true;
+      for (auto node: sub_lite_graph->all_nodes_) {
+        if (std::find(node->output_indices_.begin(), node->output_indices_.end(), new_tensor_index) !=
+            node->output_indices_.end()) {
+          is_subgraph_input = false;
+          MS_LOG(DEBUG) << "- tensor: " << new_tensor_index << " is not subgraph input." << std::endl;
+          break;
+        }
+      }
+      if (is_subgraph_input) {
+        sub_graph->input_indices_.push_back(new_tensor_index);
+        MS_LOG(DEBUG) << "- select tensor: " << new_tensor_index << " as subgraph input." << std::endl;
+      }
+    }
+
+    for (auto iter: out_tensor_index_map) {
+      int new_tensor_index = new_tensor_maps.at(iter.second);
+      MS_LOG(DEBUG) << "handle output: old: " << iter.first << ", new: " << new_tensor_index << std::endl;
+      if (IsConstTensor(*iter.second)) {
+        MS_LOG(DEBUG) << "- tensor: " << new_tensor_index << " is const." << std::endl;
+        continue;
+      }
+
+      bool is_subgraph_output = false;
+      for (size_t i = 0; i < lite_graph_->all_nodes_.size(); i++) {
+        if ((i >= op_range.begin_index_) && (i < op_range.end_index_)) {
+          continue;
+        }
+        auto node = lite_graph_->all_nodes_[i];
+        if (std::find(node->input_indices_.begin(), node->input_indices_.end(), iter.first) !=
+            node->input_indices_.end()) { // As the input of node which does not belong to the subgraph.
+          is_subgraph_output = true;
+          MS_LOG(DEBUG) << "- tensor: " << new_tensor_index << " is original subgraph output. node: " << node->primitive_ << std::endl;
+          break;
+        }
+      }
+      bool is_graph_output = (std::find(lite_graph_->output_indices_.begin(),lite_graph_->output_indices_.end(),
+                                        iter.first) != lite_graph_->output_indices_.end());
+      if (is_graph_output) {
+        MS_LOG(DEBUG) << "- tensor: " << new_tensor_index << " is graph output." << std::endl;
+      }
+      if (is_subgraph_output || is_graph_output) {
+        sub_graph->output_indices_.push_back(new_tensor_index);
+        MS_LOG(DEBUG) << "- select tensor: " << new_tensor_index << " as subgraph output." << std::endl;
+      }
+    }
+
+    // deal with full-graph's input/output indices
+    sub_lite_graph->input_indices_ = sub_graph->input_indices_;
+    sub_lite_graph->output_indices_ = sub_graph->output_indices_;
+    sub_lite_graphs->push_back(sub_lite_graph);
   }
+  MS_LOG(INFO) << "Finished creating LiteGraph for NNRT subgraph";
+  return kSuccess;
+}
 
-  ret_code = OH_NNCompilation_Build(oh_nn_compilation);
+struct TensorLocation {
+  uint32_t node_index; // the index of node which the tensor belongs to.
+  uint32_t tensor_index; // the index of node in/out tensors which the tensor is located at.
+};
 
+Status NNRTDelegate::InitNNCompilation(OH_NNCompilation *nn_compilation) const {
+  auto ret_code = OH_NNCompilation_SetDevice(nn_compilation, nnrt_device_info_.device_id_);
   if (ret_code != OH_NN_SUCCESS) {
-    MS_LOG(ERROR) << "Build NNCompilation failed, OH_NN_ReturnCode = " << ret_code;
-    OH_NNCompilation_Destroy(&oh_nn_compilation);
-    OH_NNModel_Destroy(&oh_nnmodel);
-    return mindspore::kLiteError;
+    MS_LOG(ERROR) << "NNCompilation set device id failed, ret: " << ret_code;
+    return kLiteError;
+  }
+  ret_code = OH_NNCompilation_SetPerformanceMode(nn_compilation,
+                                                 (OH_NN_PerformanceMode)(nnrt_device_info_.performance_mode_));
+  if ((ret_code != OH_NN_SUCCESS) && (ret_code != OH_NN_OPERATION_FORBIDDEN)) {
+    MS_LOG(ERROR) << "NNCompilation set performance mode failed, ret: " << ret_code;
+    return kLiteError;
+  }
+  ret_code = OH_NNCompilation_SetPriority(nn_compilation, (OH_NN_Priority)(nnrt_device_info_.priority_));
+  if ((ret_code != OH_NN_SUCCESS) && (ret_code != OH_NN_OPERATION_FORBIDDEN)) {
+    MS_LOG(ERROR) << "NNCompilation set priority failed, ret: " << ret_code;
+    return kLiteError;
+  }
+  ret_code = OH_NNCompilation_EnableFloat16(nn_compilation, nnrt_device_info_.enable_fp16_);
+  if ((ret_code != OH_NN_SUCCESS) && (ret_code != OH_NN_OPERATION_FORBIDDEN)) {
+    MS_LOG(ERROR) << "NNCompilation enable fp16 failed, ret: " << ret_code;
+    return kLiteError;
   }
 
-  MS_LOG(DEBUG) << "NNRTDelegate SetDevice success.";
-
-  OH_NNExecutor *oh_nn_executor = nullptr;
-  oh_nn_executor = OH_NNExecutor_Construct(oh_nn_compilation);
-  if (oh_nn_executor == nullptr) {
-    MS_LOG(ERROR) << "Construct NNCompilation SetDevice failed, OH_NN_ReturnCode = " << ret_code;
-    OH_NNCompilation_Destroy(&oh_nn_compilation);
-    OH_NNModel_Destroy(&oh_nnmodel);
-    return mindspore::kLiteError;
+  ret_code = OH_NNCompilation_Build(nn_compilation);
+  if (ret_code != OH_NN_SUCCESS) {
+    MS_LOG(ERROR) << "Build NNCompilation failed, ret: " << ret_code;
+    return kLiteError;
   }
-  MS_LOG(DEBUG) << "NNRTDelegate creates NNExecutor success.";
-  mindspore::Status prepare_data_ret;
-  auto nnr_model_kernel = new (std::nothrow) NNRTModelKernel(oh_nn_executor, model->inputs(), model->outputs());
-  if (nnr_model_kernel == nullptr) {
-    MS_LOG(ERROR) << "new NNRTModelKernel failed";
-    return mindspore::kLiteError;
+  return kSuccess;
+}
+
+Status NNRTDelegate::CreateNNRTSubgraphKernels(DelegateModel<schema::Primitive> *model,
+                                               const std::vector<LiteGraph *> &sub_lite_graphs, const std::vector<NNRTOpRange> &nnrt_subgraph_ranges,
+                                               std::vector<NNRTModelKernel *> *nnrt_subgraph_kernels) {
+  for (size_t i = 0; i < sub_lite_graphs.size(); i++) {
+    auto sub_lite_graph = sub_lite_graphs[i];
+
+    OH_NNModel *nn_model = OH_NNModel_Construct();
+    auto ret = OH_NNModel_BuildFromLiteGraph(nn_model, sub_lite_graph);
+    if (ret != OH_NN_SUCCESS) {
+      MS_LOG(ERROR) << "Build NNModel failed, ret: " << ret;
+      OH_NNModel_Destroy(&nn_model);
+      return kLiteError;
+    }
+
+    OH_NNCompilation *nn_compilation = OH_NNCompilation_Construct(nn_model);
+    if (nn_compilation == nullptr) {
+      MS_LOG(ERROR) << "Construct NNCompilation failed";
+      OH_NNModel_Destroy(&nn_model);
+      return kLiteError;
+    }
+    MS_LOG(DEBUG) << "NNRTDelegate creates NNCompilation success.";
+
+    auto ret_code = InitNNCompilation(nn_compilation);
+    if (ret_code != kSuccess) {
+      MS_LOG(ERROR) << "Init NNCompilation failed";
+      OH_NNCompilation_Destroy(&nn_compilation);
+      OH_NNModel_Destroy(&nn_model);
+      return kLiteError;
+    }
+
+    OH_NNExecutor *nn_executor = nullptr;
+    nn_executor = OH_NNExecutor_Construct(nn_compilation);
+    if (nn_executor == nullptr) {
+      MS_LOG(ERROR) << "Construct NNExecutor failed, ret: " << ret_code;
+      OH_NNCompilation_Destroy(&nn_compilation);
+      OH_NNModel_Destroy(&nn_model);
+      return kLiteError;
+    }
+    MS_LOG(DEBUG) << "NNRTDelegate creates NNExecutor success.";
+
+    bool format_not_support = false;
+    std::vector<MSTensor> in_tensors;
+    for (auto index: sub_lite_graph->sub_graphs_[0]->input_indices_) {
+      TensorLocation location;
+      for (auto node_index: sub_lite_graph->sub_graphs_[0]->node_indices_) {
+        auto node = sub_lite_graph->all_nodes_[node_index];
+        auto iter = std::find(node->input_indices_.begin(), node->input_indices_.end(), index);
+        if (iter != node->input_indices_.end()) {
+          uint32_t tensor_index = iter - node->input_indices_.begin();
+          location.node_index = node_index;
+          location.tensor_index = tensor_index;
+          MS_LOG(INFO) << "Found graph input index: " << index << " is the " << tensor_index << "th input of the node " << node->primitive_;
+          break;
+        }
+      }
+      KernelIter kernel_iter = nnrt_subgraph_ranges[i].begin_iter_ + location.node_index;
+      in_tensors.push_back((*kernel_iter)->inputs()[location.tensor_index]);
+      if (in_tensors.back().format() != Format::NHWC) {
+        format_not_support = true;
+        break ;
+      }
+    }
+
+    std::vector<MSTensor> out_tensors;
+    for (auto index: sub_lite_graph->sub_graphs_[0]->output_indices_) {
+      TensorLocation location;
+      for (auto node_index: sub_lite_graph->sub_graphs_[0]->node_indices_) {
+        auto node = sub_lite_graph->all_nodes_[node_index];
+        auto iter = std::find(node->output_indices_.begin(), node->output_indices_.end(), index);
+        if (iter != node->output_indices_.end()) {
+          uint32_t tensor_index = iter - node->output_indices_.begin();
+          location.node_index = node_index;
+          location.tensor_index = tensor_index;
+          MS_LOG(INFO) << "Found graph output index: " << index << " is the " << tensor_index << "th output of the node " << node->primitive_;
+          break;
+        }
+      }
+      KernelIter kernel_iter = nnrt_subgraph_ranges[i].begin_iter_ + location.node_index;
+      out_tensors.push_back((*kernel_iter)->outputs()[location.tensor_index]);
+      if (out_tensors.back().format() != Format::NHWC) {
+        format_not_support = true;
+        break ;
+      }
+    }
+    if (format_not_support) {
+      MS_LOG(WARNING) << "Not support in/out tensor format, skip this subgraph";
+      OH_NNCompilation_Destroy(&nn_compilation);
+      OH_NNModel_Destroy(&nn_model);
+      nnrt_subgraph_kernels->push_back(nullptr);
+      continue ;
+    }
+
+    auto nnrt_model_kernel = new (std::nothrow)NNRTModelKernel(nn_executor, in_tensors, out_tensors);
+    if (nnrt_model_kernel == nullptr) {
+      MS_LOG(ERROR) << "new NNRTModelKernel failed";
+      return kLiteError;
+    }
+    OH_NNCompilation_Destroy(&nn_compilation);
+    OH_NNModel_Destroy(&nn_model);
+    nnrt_subgraph_kernels->push_back(nnrt_model_kernel);
   }
-  OH_NNCompilation_Destroy(&oh_nn_compilation);
-  OH_NNModel_Destroy(&oh_nnmodel);
-  KernelIter from = model->BeginKernelIterator();
-  KernelIter end = model->EndKernelIterator();
-  model->Replace(from, end, nnr_model_kernel);
-
-  MS_LOG(DEBUG) << "NNRTDelegate build success.";
-  return mindspore::kSuccess;
+  return kSuccess;
 }
 
-mindspore::Status mindspore::NNRTDelegate::Init() {
-  MS_LOG(DEBUG) << "NNRTDelegate init success.";
-  return mindspore::kSuccess;
+void NNRTDelegate::ReplaceNNRTKernelsInDelegateModel(DelegateModel<schema::Primitive> *model,
+                                       const std::vector<NNRTOpRange> &nnrt_subgraph_ranges,
+                                       const std::vector<NNRTModelKernel *> &nnrt_subgraph_kernels) {
+  // Here we perform the replacement from back to front intentionally! If replace from front to end, the kernel
+  // sequence would shrink and the later begin_iter_/end_iter_ may be erased already.
+  for (int i = nnrt_subgraph_ranges.size() - 1; i >= 0; i--) {
+    if (nnrt_subgraph_kernels[i] == nullptr) {
+      continue;
+    }
+    auto from = nnrt_subgraph_ranges[i].begin_iter_;
+    auto end = nnrt_subgraph_ranges[i].end_iter_;
+    (void)model->Replace(from, end, nnrt_subgraph_kernels[i]);
+    MS_LOG(INFO) << "Replace nnrt subgraph kernel in range: [" << (from - model->BeginKernelIterator())
+      << ", " << (end - model->BeginKernelIterator()) << ")";
+  }
 }
-mindspore::Status mindspore::NNRTDelegate::PrepareInputs(DelegateModel<schema::Primitive> *model,
-                                                         OH_NNExecutor *oh_nn_executor) {
+
+Status NNRTDelegate::PrepareInputs(DelegateModel<schema::Primitive> *model,
+                                   OH_NNExecutor *oh_nn_executor) {
   auto input_tensors = model->inputs();
   for (size_t i = 0; i < input_tensors.size(); i++) {
     auto tensor = input_tensors[i];
@@ -164,10 +460,10 @@ mindspore::Status mindspore::NNRTDelegate::PrepareInputs(DelegateModel<schema::P
     std::vector<double> scale;
     std::vector<int32_t> zero_point;
     if (!tmp_quant_param.empty()) {
-      quant_param = new (std::nothrow) OH_NN_QuantParam;
+      quant_param = new(std::nothrow) OH_NN_QuantParam;
       if (quant_param == nullptr) {
         MS_LOG(ERROR) << "new OH_NN_QuantParam failed.";
-        return mindspore::kLiteError;
+        return kLiteError;
       }
       for (auto qparam : tmp_quant_param) {
         bit_num.emplace_back(qparam.bit_num);
@@ -179,12 +475,12 @@ mindspore::Status mindspore::NNRTDelegate::PrepareInputs(DelegateModel<schema::P
       quant_param->scale = scale.data();
       quant_param->zeroPoint = zero_point.data();
     }
-    auto oprend = new (std::nothrow) OH_NN_Tensor;
+    auto oprend = new(std::nothrow) OH_NN_Tensor;
     if (oprend == nullptr) {
       MS_LOG(ERROR) << "new OH_NN_Tensor Failed";
-      return mindspore::kLiteError;
+      return kLiteError;
     }
-    oprend->dataType = ConvertDataType(tensor.DataType());
+    oprend->dataType = CastToNNRTDataType(tensor.DataType());
     oprend->dimensionCount = tensor_shape.size();
 
     std::vector<int32_t> dimensions_list;
@@ -194,14 +490,14 @@ mindspore::Status mindspore::NNRTDelegate::PrepareInputs(DelegateModel<schema::P
       } else {
         MS_LOG(ERROR) << "NNExecutor SetInput failed,tensor dimension is is too large, max dim = " << INT32_MAX
                       << ", but get dimension = " << shape;
-        return mindspore::kLiteError;
+        return kLiteError;
       }
     }
     oprend->dimensions = dimensions_list.data();
     oprend->quantParam = quant_param;
     oprend->type = OH_NN_TENSOR;
     OH_NN_ReturnCode ret_code =
-      OH_NNExecutor_SetInput(oh_nn_executor, i, oprend, tensor.MutableData(), tensor.DataSize());
+        OH_NNExecutor_SetInput(oh_nn_executor, i, oprend, tensor.MutableData(), tensor.DataSize());
     delete (oprend);
 
     if (!tmp_quant_param.empty()) {
@@ -212,70 +508,37 @@ mindspore::Status mindspore::NNRTDelegate::PrepareInputs(DelegateModel<schema::P
     if (ret_code != OH_NN_SUCCESS) {
       MS_LOG(ERROR) << "NNExecutor SetInput failed, current input tensor is" << tensor.Name()
                     << "OH_NN_ReturnCode = " << ret_code;
-      return mindspore::kLiteError;
+      return kLiteError;
     }
   }
-
-  return mindspore::kSuccess;
+  return kSuccess;
 }
-OH_NN_DataType mindspore::NNRTDelegate::ConvertDataType(mindspore::DataType data_type) {
-  OH_NN_DataType oh_data_type;
-  switch (data_type) {
-    case mindspore::DataType::kTypeUnknown:
-    case mindspore::DataType::kObjectTypeString:
-    case mindspore::DataType::kObjectTypeList:
-    case mindspore::DataType::kObjectTypeTuple:
-    case mindspore::DataType::kObjectTypeTensorType:
-    case mindspore::DataType::kNumberTypeBegin:
-    case mindspore::DataType::kNumberTypeEnd:
-    case mindspore::DataType::kInvalidType:
-      oh_data_type = OH_NN_UNKNOWN;
-      break;
-    case mindspore::DataType::kNumberTypeBool:
-      oh_data_type = OH_NN_BOOL;
-      break;
-    case mindspore::DataType::kNumberTypeInt8:
-      oh_data_type = OH_NN_INT8;
-      break;
-    case mindspore::DataType::kNumberTypeInt16:
-      oh_data_type = OH_NN_INT16;
-      break;
-    case mindspore::DataType::kNumberTypeInt32:
-      oh_data_type = OH_NN_INT32;
-      break;
-    case mindspore::DataType::kNumberTypeInt64:
-      oh_data_type = OH_NN_INT64;
-      break;
-    case mindspore::DataType::kNumberTypeUInt8:
-      oh_data_type = OH_NN_UINT8;
-      break;
-    case mindspore::DataType::kNumberTypeUInt16:
-      oh_data_type = OH_NN_UINT16;
-      break;
-    case mindspore::DataType::kNumberTypeUInt32:
-      oh_data_type = OH_NN_UINT32;
-      break;
-    case mindspore::DataType::kNumberTypeUInt64:
-      oh_data_type = OH_NN_UINT64;
-      break;
-    case mindspore::DataType::kNumberTypeFloat16:
-      oh_data_type = OH_NN_FLOAT16;
-      break;
-    case mindspore::DataType::kNumberTypeFloat32:
-      oh_data_type = OH_NN_FLOAT32;
-      break;
-    case mindspore::DataType::kNumberTypeFloat64:
-      oh_data_type = OH_NN_FLOAT64;
-      break;
-    default: {
-      oh_data_type = OH_NN_UNKNOWN;
-    }
+
+OH_NN_DataType NNRTDelegate::CastToNNRTDataType(DataType data_type) {
+  const std::unordered_map<DataType, OH_NN_DataType> kDataTypeMap = {
+      {DataType::kNumberTypeBool, OH_NN_BOOL},
+      {DataType::kNumberTypeInt8, OH_NN_INT8},
+      {DataType::kNumberTypeInt16, OH_NN_INT16},
+      {DataType::kNumberTypeInt32, OH_NN_INT32},
+      {DataType::kNumberTypeInt64, OH_NN_INT64},
+      {DataType::kNumberTypeUInt8, OH_NN_UINT8},
+      {DataType::kNumberTypeUInt16, OH_NN_UINT16},
+      {DataType::kNumberTypeUInt32, OH_NN_UINT32},
+      {DataType::kNumberTypeUInt64, OH_NN_UINT64},
+      {DataType::kNumberTypeFloat16, OH_NN_FLOAT16},
+      {DataType::kNumberTypeFloat32, OH_NN_FLOAT32},
+      {DataType::kNumberTypeFloat64, OH_NN_FLOAT64},
+  };
+
+  auto iter = kDataTypeMap.find(data_type);
+  if (iter == kDataTypeMap.end()) {
+    return OH_NN_UNKNOWN;
   }
-  return oh_data_type;
+  return iter->second;
 }
 
-mindspore::Status mindspore::NNRTDelegate::PrepareOutputs(DelegateModel<schema::Primitive> *model,
-                                                          OH_NNExecutor *oh_nn_executor) {
+Status NNRTDelegate::PrepareOutputs(DelegateModel<schema::Primitive> *model,
+                                    OH_NNExecutor *oh_nn_executor) {
   auto output_tensors = model->outputs();
   for (size_t i = 0; i < output_tensors.size(); i++) {
     auto tensor = output_tensors[i];
@@ -283,17 +546,17 @@ mindspore::Status mindspore::NNRTDelegate::PrepareOutputs(DelegateModel<schema::
     if (ret_code != OH_NN_SUCCESS) {
       MS_LOG(ERROR) << "NNExecutor SetOutput failed, current out tensor is" << tensor.Name()
                     << ", OH_NN_ReturnCode = " << ret_code;
-      return mindspore::kLiteError;
+      return kLiteError;
     }
   }
-  return mindspore::kSuccess;
+  return kSuccess;
 }
 
-void mindspore::NNRTDelegate::ShallowCopyLiteGraph(const mindspore::lite::LiteGraph &lite_graph) {
+void NNRTDelegate::ShallowCopyLiteGraph(const lite::LiteGraph &lite_graph) {
   Status ret;
   for (auto node : lite_graph.all_nodes_) {
     ret = lite::CheckPrimitiveSupported(static_cast<const schema::Primitive *>(node->primitive_));
-    if (ret == mindspore::kLiteError) {
+    if (ret == kLiteError) {
       MS_LOG(ERROR) << " primitive supported check failed.";
       return;
     }
@@ -302,7 +565,7 @@ void mindspore::NNRTDelegate::ShallowCopyLiteGraph(const mindspore::lite::LiteGr
   node_list.reserve(lite_graph.all_nodes_.size());
   // copy node
   for (auto node : lite_graph.all_nodes_) {
-    auto new_node = new (std::nothrow) LiteGraph::Node;
+    auto new_node = new(std::nothrow) LiteGraph::Node;
     if (new_node == nullptr) {
       MS_LOG(ERROR) << " new LiteGraph::Node failed.";
       return;
@@ -321,7 +584,7 @@ void mindspore::NNRTDelegate::ShallowCopyLiteGraph(const mindspore::lite::LiteGr
   // copy subgraph
   std::vector<LiteGraph::SubGraph *> subgraph_list;
   for (auto subgraph : lite_graph.sub_graphs_) {
-    auto new_subgraph = new (std::nothrow) LiteGraph::SubGraph;
+    auto new_subgraph = new(std::nothrow) LiteGraph::SubGraph;
     if (new_subgraph == nullptr) {
       MS_LOG(ERROR) << "new LiteGraph::Subgraph failed.";
       return;
@@ -334,30 +597,32 @@ void mindspore::NNRTDelegate::ShallowCopyLiteGraph(const mindspore::lite::LiteGr
   }
   for (auto tensor : lite_graph.all_tensors_) {
     ret = lite::CheckTensorSupported(static_cast<const schema::Tensor *>(tensor));
-    if (ret == mindspore::kLiteError) {
+    if (ret == kLiteError) {
       MS_LOG(ERROR) << "tensor supported check failed.";
       return;
     }
   }
 
-  nnrt_lite_graph = new (std::nothrow) lite::LiteGraph();
-  if (nnrt_lite_graph == nullptr) {
+  lite_graph_ = new(std::nothrow) lite::LiteGraph();
+  if (lite_graph_ == nullptr) {
     MS_LOG(ERROR) << "new LiteGraph failed.";
     return;
   }
 
-  nnrt_lite_graph->name_ = lite_graph.name_;
-  nnrt_lite_graph->version_ = lite_graph.version_;
-  nnrt_lite_graph->input_indices_ = lite_graph.input_indices_;
-  nnrt_lite_graph->output_indices_ = lite_graph.output_indices_;
-  nnrt_lite_graph->all_tensors_ = lite_graph.all_tensors_;
-  nnrt_lite_graph->all_nodes_ = node_list;
-  nnrt_lite_graph->sub_graphs_ = subgraph_list;
+  lite_graph_->name_ = lite_graph.name_;
+  lite_graph_->version_ = lite_graph.version_;
+  lite_graph_->input_indices_ = lite_graph.input_indices_;
+  lite_graph_->output_indices_ = lite_graph.output_indices_;
+  lite_graph_->all_tensors_ = lite_graph.all_tensors_;
+  lite_graph_->all_nodes_ = node_list;
+  lite_graph_->sub_graphs_ = subgraph_list;
   MS_LOG(INFO) << "ShallowCopyLiteGraph success.";
 }
 
-mindspore::NNRTDelegate::~NNRTDelegate() {
-  if (this->nnrt_lite_graph != nullptr) {
+NNRTDelegate::~NNRTDelegate() {
+  if (lite_graph_ != nullptr) {
     MS_LOG(ERROR) << "Delete NNRTDelegate.";
   }
-};
+}
+}  // namespace lite
+}  // namespace mindspore
diff --git a/mindspore/lite/src/runtime/delegate/nnrt/nnrt_delegate.h b/mindspore/lite/src/runtime/delegate/nnrt/nnrt_delegate.h
index 1be08119..48adc388 100644
--- a/mindspore/lite/src/runtime/delegate/nnrt/nnrt_delegate.h
+++ b/mindspore/lite/src/runtime/delegate/nnrt/nnrt_delegate.h
@@ -15,38 +15,64 @@
  */
 #ifndef MINDSPORE_NNR_DELEGATE_H
 #define MINDSPORE_NNR_DELEGATE_H
+
 #include <vector>
 #include <map>
 #include "include/api/delegate.h"
 #include "include/context.h"
 #include "include/model.h"
+#include "nnrt_model_kernel.h"
+#include "schema/model_generated.h"
 #include "interfaces/kits/c/neural_network_runtime_type.h"
-namespace mindspore {
+#include "interfaces/kits/c/neural_network_runtime.h"
+#include "interfaces/innerkits/c/neural_network_runtime_inner.h"
 
-using namespace lite;
+namespace mindspore {
+namespace lite {
+struct NNRTOpRange {
+  /* NNRT kernel range in DelegateModel: [begin_iter_, end_iter_) */
+  KernelIter begin_iter_;
+  KernelIter end_iter_;
+  /* NNRT node range in lite_graph_: [begin_index_, end_index_) */
+  size_t begin_index_;
+  size_t end_index_;
+};
 
 class NNRTDelegate : public Delegate {
  public:
-  NNRTDelegate() : Delegate(){};
-
+  NNRTDelegate() = default;
+  NNRTDelegate(const NNRtDeviceInfo &nnrt_device_info) : nnrt_device_info_(nnrt_device_info) {}
   ~NNRTDelegate() override;
-
-  Status Init() override;
-
+  Status Init() override { return kSuccess; }
   Status Build(DelegateModel<schema::Primitive> *model) override;
-
   void ShallowCopyLiteGraph(const lite::LiteGraph &liteGraph);
-
- protected:
-  LiteGraph *nnrt_lite_graph = nullptr;
+  static std::vector<NNRTOpRange> GetNNRTSubgraphRanges(DelegateModel<schema::Primitive> *model,
+                                                        const std::vector<bool> &op_supports);
 
  private:
-  //  static LiteGraph* CreateLiteGraph(const LiteGraph &liteGraph);
+  OH_NNModel *BuildFullNNModel();
+  std::vector<bool> QueryOpSupports(OH_NNModel *nn_model);
+
+  Status CreateLiteGraphForNNRTSubgraph(
+      const std::vector<NNRTOpRange> &nnrt_op_ranges,
+      std::vector<LiteGraph *> *sub_lite_graphs);
+  Status CreateNNRTSubgraphKernels(
+      DelegateModel<schema::Primitive> *model,
+      const std::vector<LiteGraph *> &sub_lite_graphs,
+      const std::vector<NNRTOpRange> &nnrt_subgraph_ranges,
+      std::vector<NNRTModelKernel *> *nnrt_subgraph_kernels);
+  void ReplaceNNRTKernelsInDelegateModel(DelegateModel<schema::Primitive> *model,
+                                         const std::vector<NNRTOpRange> &nnrt_subgraph_ranges,
+                                         const std::vector<NNRTModelKernel *> &nnrt_subgraph_kernels);
   Status PrepareInputs(DelegateModel<schema::Primitive> *model, OH_NNExecutor *oh_nn_executor);
   Status PrepareOutputs(DelegateModel<schema::Primitive> *model, OH_NNExecutor *oh_nn_executor);
-  OH_NN_DataType ConvertDataType(mindspore::DataType data_type);
-};
+  Status InitNNCompilation(OH_NNCompilation *nn_compilation) const;
+  static OH_NN_DataType CastToNNRTDataType(mindspore::DataType data_type);
 
+  NNRtDeviceInfo nnrt_device_info_;
+  LiteGraph *lite_graph_ = nullptr;
+};
+}  // namespace lite
 }  // namespace mindspore
 
 #endif  // MINDSPORE_NNR_DELEGATE_H
diff --git a/mindspore/lite/src/runtime/delegate/nnrt/nnrt_stub.cc b/mindspore/lite/src/runtime/delegate/nnrt/nnrt_stub.cc
index 886ac304..f2bee949 100644
--- a/mindspore/lite/src/runtime/delegate/nnrt/nnrt_stub.cc
+++ b/mindspore/lite/src/runtime/delegate/nnrt/nnrt_stub.cc
@@ -75,4 +75,25 @@ OH_NN_ReturnCode OH_NNModel_GetAvailableOperations(OH_NNModel *model,
 
 OH_NN_ReturnCode OH_NNModel_BuildFromLiteGraph(OH_NNModel *model, const void *liteGraph) {
   return OH_NN_SUCCESS;
+}
+
+OH_NN_ReturnCode OH_NNDevice_GetName(size_t deviceID, const char **name) {
+  return OH_NN_SUCCESS;
+}
+
+OH_NN_ReturnCode OH_NNDevice_GetType(size_t deviceID, OH_NN_DeviceType *deviceType) {
+  return OH_NN_SUCCESS;
+}
+
+OH_NN_ReturnCode OH_NNCompilation_SetPriority(OH_NNCompilation *compilation, OH_NN_Priority priority) {
+  return OH_NN_SUCCESS;
+}
+
+OH_NN_ReturnCode OH_NNCompilation_EnableFloat16(OH_NNCompilation *compilation, bool enableFloat16) {
+  return OH_NN_SUCCESS;
+}
+
+OH_NN_ReturnCode OH_NNCompilation_SetPerformanceMode(OH_NNCompilation *compilation,
+                                                     OH_NN_PerformanceMode performanceMode) {
+  return OH_NN_SUCCESS;
 }
\ No newline at end of file
diff --git a/mindspore/lite/src/runtime/lite_model.cc b/mindspore/lite/src/runtime/lite_model.cc
index 124b4728..cd8e68d1 100644
--- a/mindspore/lite/src/runtime/lite_model.cc
+++ b/mindspore/lite/src/runtime/lite_model.cc
@@ -91,6 +91,8 @@ int LiteModel::ConvertSubGraph(const schema::SubGraph &sub_graph) {
   if (sub_graph.name() == nullptr || sub_graph.inputIndices() == nullptr || sub_graph.outputIndices() == nullptr ||
       sub_graph.tensorIndices() == nullptr) {
     MS_LOG(ERROR) << "sub_graph is invalid";
+    MS_LOG(ERROR) << "sub_graph.name() = " << sub_graph.name() << ", sub_graph.inputIndices() = " << sub_graph.inputIndices()
+      << ", sub_graph.outputIndices() = " << sub_graph.outputIndices() << ", sub_graph.tensorIndices() = " << sub_graph.tensorIndices();
     return RET_ERROR;
   }
 
@@ -530,6 +532,33 @@ Model *ImportFromBuffer(const char *model_buf, size_t size, bool take_buf, minds
   return model;
 }
 
+std::string LiteGraph::ToString() const {
+  std::stringstream ss;
+  ss << "all_nodes: " << all_nodes_.size() << std::endl;
+  for (size_t i = 0; i < all_nodes_.size(); i++) {
+    ss << "- node " << i << ": " << all_nodes_[i]->primitive_ << std::endl;
+    ss << "- node " << i << " input_indices_: " << all_nodes_[i]->input_indices_ << std::endl;
+    ss << "- node " << i << " output_indices_: " << all_nodes_[i]->output_indices_ << std::endl;
+  }
+  ss << "all_tensors: " << all_tensors_.size() << std::endl;
+  for (size_t i = 0; i < all_tensors_.size(); i++) {
+    ss << "- tensor " << i << ": " << all_tensors_[i] << std::endl;
+  }
+  ss << "input_indices: " << input_indices_<< std::endl;
+  ss << "output_indices: " << output_indices_ << std::endl;
+
+  ss << "subgraphs: " << std::endl;
+  int count = 0;
+  for (auto subgraph: sub_graphs_) {
+    ss << "- subgraph " << count++ << std::endl;
+    ss << "--- subgraph input " << subgraph->input_indices_ << std::endl;
+    ss << "--- subgraph output " << subgraph->output_indices_ << std::endl;
+    ss << "--- subgraph node " << subgraph->node_indices_ << std::endl;
+    ss << "--- subgraph tensor " << subgraph->tensor_indices_ << std::endl;
+  }
+  return ss.str();
+}
+
 Model *Model::Import(const char *model_buf, size_t size) { return ImportFromBuffer(model_buf, size, false); }
 
 Model *Model::Import(const char *filename) { return ImportFromPath(filename); }
diff --git a/mindspore/lite/src/runtime/lite_session.cc b/mindspore/lite/src/runtime/lite_session.cc
index eb83f444..b8808e21 100644
--- a/mindspore/lite/src/runtime/lite_session.cc
+++ b/mindspore/lite/src/runtime/lite_session.cc
@@ -834,7 +834,14 @@ int LiteSession::CreateNPUDelegate() {
 
 int LiteSession::CreateNNRTDelegate() {
 #if SUPPORT_NNRT
-  delegate_ = std::make_shared<NNRTDelegate>();
+  auto iter = std::find_if(context_->device_list_.begin(), context_->device_list_.end(),
+                           [](DeviceContext &device) { return device.device_type_ == lite::DT_NNRT; });
+  if(iter == context_->device_list_.end()) {
+    MS_LOG(ERROR) << "Found non NNRT device info";
+    return RET_ERROR;
+  }
+
+  delegate_ = std::make_shared<NNRTDelegate>(iter->device_info_.nnrt_device_info_);
   if (delegate_ == nullptr) {
     MS_LOG(ERROR) << "New NNRT delegate failed";
     return RET_ERROR;
diff --git a/mindspore/lite/src/runtime/tensor_category.cc b/mindspore/lite/src/runtime/tensor_category.cc
index 07d14de0..9a755d81 100644
--- a/mindspore/lite/src/runtime/tensor_category.cc
+++ b/mindspore/lite/src/runtime/tensor_category.cc
@@ -30,5 +30,9 @@ Category TensorCategory(const schema::Tensor &tensor) {
   auto data_size = tensor.data() == nullptr ? 0 : tensor.data()->size();
   return TensorCategory(tensor.nodeType(), shape_num, TypeId(tensor.dataType()), data_size);
 }
+
+bool IsConstTensor(const schema::Tensor &tensor) {
+  return TensorCategory(tensor) != Category::VAR;
+}
 }  // namespace lite
 }  // namespace mindspore
diff --git a/mindspore/lite/src/runtime/tensor_category.h b/mindspore/lite/src/runtime/tensor_category.h
index 34d6cc82..8dadc344 100644
--- a/mindspore/lite/src/runtime/tensor_category.h
+++ b/mindspore/lite/src/runtime/tensor_category.h
@@ -34,6 +34,7 @@ enum Category {
 
 Category TensorCategory(const int node_type, const size_t shape_num, const TypeId data_type, const size_t data_size);
 Category TensorCategory(const schema::Tensor &tensor);
+bool IsConstTensor(const schema::Tensor &tensor);
 }  // namespace lite
 }  // namespace mindspore
 #endif  // MINDSPORE_LITE_SRC_RUNTIME_TENSOR_CATEGORY_H_
diff --git a/mindspore/lite/test/CMakeLists.txt b/mindspore/lite/test/CMakeLists.txt
index 5fa7bea0..c7c4a3cb 100644
--- a/mindspore/lite/test/CMakeLists.txt
+++ b/mindspore/lite/test/CMakeLists.txt
@@ -28,9 +28,13 @@ file(GLOB_RECURSE TEST_UT_SRC
         ${TEST_DIR}/ut/src/runtime/kernel/arm/common/*.cc
         ${TEST_DIR}/ut/src/runtime/kernel/arm/fp32/*.cc
         ${TEST_DIR}/ut/src/runtime/kernel/arm/string/*.cc
-        ${TEST_DIR}/ut/src/api/context_c_test.cc
-        ${TEST_DIR}/ut/src/api/tensor_c_test.cc
+#        ${TEST_DIR}/ut/src/api/context_c_test.cc
+#        ${TEST_DIR}/ut/src/api/tensor_c_test.cc
         )
+if(MSLITE_ENABLE_NNRT)
+    list(APPEND TEST_UT_SRC ${TEST_DIR}/ut/src/runtime/nnrt_delegate/nnrt_delegate_tests.cc)
+endif()
+
 if(MSLITE_ENABLE_SERVER_INFERENCE)
     list(APPEND TEST_UT_SRC ${TEST_DIR}/ut/src/api/model_parallel_runner_test.cc)
 endif()
@@ -85,7 +89,7 @@ endif()
 
 if(MSLITE_ENABLE_INT8)
     file(GLOB_RECURSE TEST_INT8_UT_SRC
-            ${TEST_DIR}/ut/src/runtime/kernel/arm/int8/*.cc
+#            ${TEST_DIR}/ut/src/runtime/kernel/arm/int8/*.cc
             ${TEST_DIR}/ut/nnacl/int8/*.cc
             )
     list(APPEND TEST_UT_SRC ${TEST_INT8_UT_SRC})
diff --git a/mindspore/lite/test/ut/src/runtime/nnrt_delegate/nnrt_delegate_tests.cc b/mindspore/lite/test/ut/src/runtime/nnrt_delegate/nnrt_delegate_tests.cc
new file mode 100644
index 00000000..e1ea3968
--- /dev/null
+++ b/mindspore/lite/test/ut/src/runtime/nnrt_delegate/nnrt_delegate_tests.cc
@@ -0,0 +1,59 @@
+/**
+ * Copyright 2023 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <memory>
+#include "gtest/gtest.h"
+#include "runtime/delegate/nnrt/nnrt_delegate.h"
+
+using namespace mindspore;
+using namespace mindspore::lite;
+
+void AssertOpRange(const std::vector<NNRTOpRange> &op_ranges, std::vector<std::vector<size_t>> expect) {
+  ASSERT_EQ(op_ranges.size(), expect.size());
+  for (size_t i = 0; i < op_ranges.size(); i++) {
+    ASSERT_EQ(op_ranges[i].begin_index_, expect[i][0]);
+    ASSERT_EQ(op_ranges[i].end_index_, expect[i][1]);
+  }
+}
+
+TEST(NNRTDelegateTest, GetNNRTSubgraphRanges) {
+  // Prepare DelegateModel
+  std::vector<kernel::Kernel *> kernels(5, nullptr);
+  std::vector<MSTensor> inputs = {};
+  std::vector<MSTensor> outputs = {};
+  std::unique_ptr<DelegateModel<schema::Primitive>> model;
+  model.reset(new DelegateModel<schema::Primitive>(&kernels, inputs, outputs, {}, SCHEMA_CUR));
+
+  std::cout << "Test case 1, expect: {[0, 1), [3, 5)}" << std::endl;
+  auto op_ranges01 = NNRTDelegate::GetNNRTSubgraphRanges(model.get(), {true, false, false, true, true});
+  AssertOpRange(op_ranges01, {{0, 1}, {3, 5}});
+
+  std::cout << "Test case 2, expect: {}" << std::endl;
+  auto op_ranges02 = NNRTDelegate::GetNNRTSubgraphRanges(model.get(), {false, false, false, false, false});
+  AssertOpRange(op_ranges02, {});
+
+  std::cout << "Test case 3, expect: {[0, 5)}" << std::endl;
+  auto op_ranges03 = NNRTDelegate::GetNNRTSubgraphRanges(model.get(), {true, true, true, true, true});
+  AssertOpRange(op_ranges03, {{0, 5}});
+
+  std::cout << "Test case 4, expect: {[0, 1), [2, 3), [4, 5)}" << std::endl;
+  auto op_ranges04 = NNRTDelegate::GetNNRTSubgraphRanges(model.get(), {true, false, true, false, true});
+  AssertOpRange(op_ranges04, {{0, 1}, {2, 3}, {4, 5}});
+
+  std::cout << "Test case 5, expect: {[1, 2), [3, 4)}" << std::endl;
+  auto op_ranges05 = NNRTDelegate::GetNNRTSubgraphRanges(model.get(), {false, true, false, true, false});
+  AssertOpRange(op_ranges05, {{1, 2}, {3, 4}});
+}
\ No newline at end of file
-- 
2.34.1