From baf2daaebd70448cddd35f5011642fe585d071b5 Mon Sep 17 00:00:00 2001 From: chengfeng27 Date: Tue, 5 Mar 2024 20:00:24 +0800 Subject: [PATCH] hilog use macro definition api --- cmake/external_libs/flatbuffers.cmake | 4 +- include/api/context.h | 65 ++ include/c_api/context_c.h | 111 +++ include/c_api/model_c.h | 178 ++++ include/c_api/tensor_c.h | 14 + include/c_api/types_c.h | 57 +- include/sdk_api/context.h | 103 +++ include/sdk_api/tensor.h | 13 + include/sdk_api/types.h | 38 +- .../plugin/device/cpu/kernel/nnacl/BUILD.gn | 3 + .../device/cpu/kernel/nnacl/CMakeLists.txt | 2 +- .../kernel/nnacl/avx/scatter_nd_binary_avx.h | 66 ++ .../nnacl/avx512/scatter_nd_binary_avx512.h | 66 ++ .../cpu/kernel/nnacl/base/scatter_nd_binary.c | 28 + .../cpu/kernel/nnacl/base/scatter_nd_binary.h | 3 + .../nnacl/base/scatter_nd_binary_simd.h.in | 14 + .../kernel/nnacl/custom_is_inf_parameter.h | 26 + .../nnacl/custom_masked_fill_parameter.h | 26 + .../custom_tensor_scatter_max_parameter.h | 26 + .../kernel/nnacl/infer/custom_is_inf_infer.c | 38 + .../kernel/nnacl/infer/custom_is_inf_infer.h | 31 + .../nnacl/infer/custom_masked_fill_infer.c | 37 + .../nnacl/infer/custom_masked_fill_infer.h | 31 + .../infer/custom_tensor_scatter_max_infer.c | 37 + .../infer/custom_tensor_scatter_max_infer.h | 31 + .../nnacl/neon/scatter_nd_binary_neon.h | 65 ++ .../plugin/device/cpu/kernel/nnacl/op_base.h | 4 + .../cpu/kernel/nnacl/scatter_nd_binary_simd.h | 36 + .../kernel/nnacl/sse/scatter_nd_binary_sse.h | 66 ++ mindspore/core/mindrt/BUILD.gn | 9 +- .../mindrt/src/thread/actor_threadpool.cc | 2 +- .../core/mindrt/src/thread/core_affinity.cc | 6 +- .../core/mindrt/src/thread/core_affinity.h | 2 +- .../mindrt/src/thread/parallel_threadpool.cc | 2 +- mindspore/core/mindrt/src/thread/threadlog.h | 28 +- .../core/mindrt/src/thread/threadpool.cc | 7 +- mindspore/lite/BUILD.gn | 82 +- mindspore/lite/CMakeLists.txt | 5 +- mindspore/lite/include/lite_types.h | 1 + mindspore/lite/include/model.h | 4 + .../lite/include/registry/converter_context.h | 4 +- mindspore/lite/mindir/include/mindir.h | 2 + mindspore/lite/mindir/src/mindir.cc | 40 + mindspore/lite/mindir/src/mindir_tensor.cc | 2 +- mindspore/lite/mindir/src/utils.cc | 2 +- mindspore/lite/src/CMakeLists.txt | 6 +- mindspore/lite/src/common/context_util.cc | 14 +- mindspore/lite/src/common/log.cc | 33 +- mindspore/lite/src/common/log.h | 50 +- .../common/ops/populate/custom_populate.cc | 53 ++ mindspore/lite/src/litert/c_api/context_c.cc | 372 +++++++- mindspore/lite/src/litert/c_api/context_c.h | 23 - mindspore/lite/src/litert/c_api/model_c.cc | 724 ++++++++------- mindspore/lite/src/litert/c_api/tensor_c.cc | 78 +- .../lite/src/litert/c_api/type_c_private.h | 40 + mindspore/lite/src/litert/cxx_api/context.cc | 85 ++ .../lite/src/litert/cxx_api/converters.cc | 60 +- .../lite/src/litert/cxx_api/converters.h | 4 +- .../src/litert/delegate/nnrt/CMakeLists.txt | 27 +- .../delegate/nnrt/checker/primitive_check.cc | 2 + .../src/litert/delegate/nnrt/nnrt_delegate.cc | 836 ++++++++++++++---- .../src/litert/delegate/nnrt/nnrt_delegate.h | 74 +- .../litert/delegate/nnrt/nnrt_model_kernel.cc | 3 +- .../litert/delegate/nnrt/nnrt_model_kernel.h | 2 +- .../src/litert/delegate/nnrt/nnrt_stub.cc | 99 +++ mindspore/lite/src/litert/infer_manager.cc | 3 +- mindspore/lite/src/litert/inner_context.cc | 4 + mindspore/lite/src/litert/inner_context.h | 14 + mindspore/lite/src/litert/kernel/cpu/BUILD.gn | 51 +- .../src/litert/kernel/cpu/base/custom_base.cc | 46 + .../src/litert/kernel/cpu/base/custom_base.h | 43 + .../litert/kernel/cpu/base/custom_is_inf.cc | 61 ++ .../litert/kernel/cpu/base/custom_is_inf.h | 38 + .../kernel/cpu/base/custom_masked_fill.cc | 84 ++ .../kernel/cpu/base/custom_masked_fill.h | 35 + .../kernel/cpu/base/custom_tensor_scatter.cc | 75 ++ .../kernel/cpu/base/custom_tensor_scatter.h | 36 + mindspore/lite/src/litert/lite_model.cc | 29 + mindspore/lite/src/litert/lite_session.cc | 39 +- mindspore/lite/src/litert/lite_session.h | 1 + mindspore/lite/src/litert/scheduler.cc | 17 + mindspore/lite/src/litert/tensor_category.cc | 4 + mindspore/lite/src/litert/tensor_category.h | 1 + mindspore/lite/test/CMakeLists.txt | 15 +- mindspore/lite/test/runtest.sh | 1 + .../test/ut/test_data/third_party_model.cfg | 8 + .../tools/converter/api/converter_api_test.cc | 10 + .../third_party_param_parser_test.cc | 176 ++++ .../lite/tools/benchmark/benchmark_base.cc | 2 +- .../lite/tools/benchmark/benchmark_base.h | 2 +- .../lite/tools/benchmark/benchmark_c_api.cc | 4 + .../tools/benchmark/benchmark_unified_api.cc | 5 + .../lite/tools/benchmark_train/CMakeLists.txt | 3 + mindspore/lite/tools/benchmark_train/main.cc | 3 +- .../lite/tools/benchmark_train/net_runner.cc | 10 +- .../lite/tools/benchmark_train/net_train.cc | 418 +-------- .../lite/tools/benchmark_train/net_train.h | 229 +---- .../tools/benchmark_train/net_train_base.cc | 410 +++++++++ .../tools/benchmark_train/net_train_base.h | 288 ++++++ .../tools/benchmark_train/net_train_c_api.cc | 659 ++++++++++++++ .../tools/benchmark_train/net_train_c_api.h | 121 +++ .../tools/benchmark_train/run_net_train.cc | 86 ++ .../tools/benchmark_train/run_net_train.h | 22 + mindspore/lite/tools/converter/CMakeLists.txt | 4 + .../config_parser/config_file_parser.cc | 27 + .../config_parser/config_file_parser.h | 15 + .../config_parser/third_party_param_parser.cc | 299 +++++++ .../config_parser/third_party_param_parser.h | 44 + mindspore/lite/tools/converter/converter.cc | 34 +- .../tools/converter/converter_funcgraph.cc | 13 +- .../converter_lite/converter_flags.cc | 4 +- .../tools/converter/cxx_api/converter_para.h | 14 + .../tools/converter/graphdef_transform.cc | 44 + .../parser/third_party/CMakeLists.txt | 4 + .../third_party/third_party_model_parser.cc | 277 ++++++ .../third_party/third_party_model_parser.h | 50 ++ .../registry/model_parser_registry.cc | 4 +- 117 files changed, 6456 insertions(+), 1432 deletions(-) create mode 100644 mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/avx/scatter_nd_binary_avx.h create mode 100644 mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/avx512/scatter_nd_binary_avx512.h create mode 100644 mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/custom_is_inf_parameter.h create mode 100644 mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/custom_masked_fill_parameter.h create mode 100644 mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/custom_tensor_scatter_max_parameter.h create mode 100644 mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/infer/custom_is_inf_infer.c create mode 100644 mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/infer/custom_is_inf_infer.h create mode 100644 mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/infer/custom_masked_fill_infer.c create mode 100644 mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/infer/custom_masked_fill_infer.h create mode 100644 mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/infer/custom_tensor_scatter_max_infer.c create mode 100644 mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/infer/custom_tensor_scatter_max_infer.h create mode 100644 mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/neon/scatter_nd_binary_neon.h create mode 100644 mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/scatter_nd_binary_simd.h create mode 100644 mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/sse/scatter_nd_binary_sse.h create mode 100644 mindspore/lite/src/litert/c_api/type_c_private.h create mode 100644 mindspore/lite/src/litert/delegate/nnrt/nnrt_stub.cc create mode 100644 mindspore/lite/src/litert/kernel/cpu/base/custom_base.cc create mode 100644 mindspore/lite/src/litert/kernel/cpu/base/custom_base.h create mode 100644 mindspore/lite/src/litert/kernel/cpu/base/custom_is_inf.cc create mode 100644 mindspore/lite/src/litert/kernel/cpu/base/custom_is_inf.h create mode 100644 mindspore/lite/src/litert/kernel/cpu/base/custom_masked_fill.cc create mode 100644 mindspore/lite/src/litert/kernel/cpu/base/custom_masked_fill.h create mode 100644 mindspore/lite/src/litert/kernel/cpu/base/custom_tensor_scatter.cc create mode 100644 mindspore/lite/src/litert/kernel/cpu/base/custom_tensor_scatter.h create mode 100644 mindspore/lite/test/ut/test_data/third_party_model.cfg create mode 100644 mindspore/lite/test/ut/tools/converter/config_parser/third_party_param_parser_test.cc create mode 100644 mindspore/lite/tools/benchmark_train/net_train_base.cc create mode 100644 mindspore/lite/tools/benchmark_train/net_train_base.h create mode 100644 mindspore/lite/tools/benchmark_train/net_train_c_api.cc create mode 100644 mindspore/lite/tools/benchmark_train/net_train_c_api.h create mode 100644 mindspore/lite/tools/benchmark_train/run_net_train.cc create mode 100644 mindspore/lite/tools/benchmark_train/run_net_train.h create mode 100644 mindspore/lite/tools/converter/config_parser/third_party_param_parser.cc create mode 100644 mindspore/lite/tools/converter/config_parser/third_party_param_parser.h create mode 100644 mindspore/lite/tools/converter/parser/third_party/CMakeLists.txt create mode 100644 mindspore/lite/tools/converter/parser/third_party/third_party_model_parser.cc create mode 100644 mindspore/lite/tools/converter/parser/third_party/third_party_model_parser.h diff --git a/cmake/external_libs/flatbuffers.cmake b/cmake/external_libs/flatbuffers.cmake index 2fde4311..87f0425b 100644 --- a/cmake/external_libs/flatbuffers.cmake +++ b/cmake/external_libs/flatbuffers.cmake @@ -21,8 +21,8 @@ else() # flatbuffers.lib cimplied by msvc set(CMAKE_STATIC_LIBRARY_PREFIX "") else() - set(flatbuffers_CXXFLAGS "-fPIC -fPIE -D_FORTIFY_SOURCE=2 -O2 -fstack-protector-strong") - set(flatbuffers_CFLAGS "-fPIC -fPIE -D_FORTIFY_SOURCE=2 -O2 -fstack-protector-strong") + set(flatbuffers_CXXFLAGS "-fPIC -fPIE -D_FORTIFY_SOURCE=2 -O2 -fstack-protector-strong -Wno-error=unused-but-set-variable") + set(flatbuffers_CFLAGS "-fPIC -fPIE -D_FORTIFY_SOURCE=2 -O2 -fstack-protector-strong -Wno-error=unused-but-set-variable") endif() if(WIN32) diff --git a/include/api/context.h b/include/api/context.h index c9fb11f0..eb704d44 100644 --- a/include/api/context.h +++ b/include/api/context.h @@ -39,6 +39,8 @@ enum DeviceType { kAscend310, kCustomDevice, kAllDevice, + //ohos-only device range[60,80) + kNNRt = 60, // add new type here kInvalidDeviceType = 100, }; @@ -598,5 +600,68 @@ void AscendDeviceInfo::SetBufferOptimizeMode(const std::string &buffer_optimize_ SetBufferOptimizeMode(StringToChar(buffer_optimize_mode)); } std::string AscendDeviceInfo::GetBufferOptimizeMode() const { return CharToString(GetBufferOptimizeModeChar()); } + +struct Extension { + std::string name; + std::vector value; +}; + +class MS_API NNRTDeviceInfo : public DeviceInfoContext { + public: + /// \brief Get the type of this DeviceInfoContext. + /// + /// \return Type of this DeviceInfoContext. + enum DeviceType GetDeviceType() const override { return DeviceType::kNNRt; }; + + /// \brief Set device id. + /// + /// \param[in] device_id The device id. + void SetDeviceID(size_t device_id); + + /// \brief Get the device id. + /// + /// \return The device id. + size_t GetDeviceID() const; + + /// \brief Set performance mode. + /// + /// \param[in] performance_mode The performance mode. + void SetPerformanceMode(int performance_mode); + + /// \brief Get performance mode. + /// + /// \return The priority. + int GetPerformanceMode() const; + + /// \brief Set priority. + /// + /// \param[in] priority The priority. + void SetPriority(int priority); + + /// \brief Get priority. + /// + /// \return The priority. + int GetPriority() const; + + /// \brief Set enables to perform the float16 inference + /// + /// \param[in] is_fp16 Enable float16 inference or not. + void SetEnableFP16(bool is_fp16); + + /// \brief Get enables to perform the float16 inference + /// + /// \return Whether enable float16 inference. + bool GetEnableFP16() const; + + /// \brief Set extensions + /// + /// \param[in] extension array. + void SetExtensions(const std::vector &extensions); + + /// \brief Get extensions + /// + /// \return extension array. + std::vector GetExtensions() const; +}; } // namespace mindspore #endif // MINDSPORE_INCLUDE_API_CONTEXT_H diff --git a/include/c_api/context_c.h b/include/c_api/context_c.h index 53839e80..8951da25 100644 --- a/include/c_api/context_c.h +++ b/include/c_api/context_c.h @@ -19,6 +19,7 @@ #include #include #include +#include "include/c_api/status_c.h" #include "include/c_api/types_c.h" #ifdef __cplusplus @@ -173,6 +174,116 @@ OH_AI_API void OH_AI_DeviceInfoSetFrequency(OH_AI_DeviceInfoHandle device_info, /// \return NPU frequency OH_AI_API int OH_AI_DeviceInfoGetFrequency(const OH_AI_DeviceInfoHandle device_info); +/// \brief Obtain the all device descriptions in NNRT. +/// +/// \param[out] num Number of NNRT device description. +/// +/// \return NNRT device description array. +OH_AI_API NNRTDeviceDesc *OH_AI_GetAllNNRTDeviceDescs(size_t *num); + +/// \brief Obtain the specified element in NNRt device description array. +/// +/// \param[in] descs NNRT device description array. +/// \param[in] index Element index. +/// +/// \return NNRT device description. +OH_AI_API NNRTDeviceDesc *OH_AI_GetElementOfNNRTDeviceDescs(NNRTDeviceDesc *descs, size_t index); + +/// \brief Obtain the all device descriptions in NNRT. +/// +/// \param[out] num Number of NNRT device description. +/// +/// \return NNRT device description array. +OH_AI_API NNRTDeviceDesc *OH_AI_GetAllNNRTDeviceDescs(size_t *num); + +/// \brief Destroy the NNRT device descriptions returned by OH_AI_GetAllNNRTDeviceDescs(). +/// +/// \param[in] desc NNRT device description array. +OH_AI_API void OH_AI_DestroyAllNNRTDeviceDescs(NNRTDeviceDesc **desc); + +/// \brief Obtain the device id in NNRT device description. +/// +/// \param[in] desc pointer to the NNRT device description instance. +/// +/// \return NNRT device id. +OH_AI_API size_t OH_AI_GetDeviceIdFromNNRTDeviceDesc(const NNRTDeviceDesc *desc); + +/// \brief Obtain the device name in NNRT device description. +/// +/// \param[in] desc pointer to the NNRT device description instance. +/// +/// \return NNRT device name. +OH_AI_API const char *OH_AI_GetNameFromNNRTDeviceDesc(const NNRTDeviceDesc *desc); + +/// \brief Obtain the device type in NNRT device description. +/// +/// \param[in] desc pointer to the NNRT device description instance. +/// +/// \return NNRT device type. +OH_AI_API OH_AI_NNRTDeviceType OH_AI_GetTypeFromNNRTDeviceDesc(const NNRTDeviceDesc *desc); + +/// \brief Create the NNRT device info by exactly matching the specific device name. +/// +/// \param[in] name NNRt device name. +/// +/// \return Device info object handle. +OH_AI_API OH_AI_DeviceInfoHandle OH_AI_CreateNNRTDeviceInfoByName(const char *name); + +/// \brief Create the NNRT device info by finding the first device with the specific device type. +/// +/// \param[in] name NNRt device type. +/// +/// \return Device info object handle. +OH_AI_API OH_AI_DeviceInfoHandle OH_AI_CreateNNRTDeviceInfoByType(OH_AI_NNRTDeviceType type); + +/// \brief Set the NNRT device id, Only valid for NNRT. +/// +/// \param[in] device_info Device info object handle. +/// \param[in] device_id NNRT device id. +OH_AI_API void OH_AI_DeviceInfoSetDeviceId(OH_AI_DeviceInfoHandle device_info, size_t device_id); + +/// \brief Obtain the NNRT device id, Only valid for NNRT. +/// +/// \param[in] device_info Device info object handle. +/// +/// \return NNRT device id. +OH_AI_API size_t OH_AI_DeviceInfoGetDeviceId(const OH_AI_DeviceInfoHandle device_info); + +/// \brief Set the NNRT performance mode, Only valid for NNRT. +/// +/// \param[in] device_info Device info object handle. +/// \param[in] device_id NNRT performance mode. +OH_AI_API void OH_AI_DeviceInfoSetPerformanceMode(OH_AI_DeviceInfoHandle device_info, OH_AI_PerformanceMode mode); + +/// \brief Obtain the NNRT performance mode, Only valid for NNRT. +/// +/// \param[in] device_info Device info object handle. +/// +/// \return NNRT performance mode. +OH_AI_API OH_AI_PerformanceMode OH_AI_DeviceInfoGetPerformanceMode(const OH_AI_DeviceInfoHandle device_info); + +/// \brief Set the NNRT priority, Only valid for NNRT. +/// +/// \param[in] device_info Device info object handle. +/// \param[in] device_id NNRT priority. +OH_AI_API void OH_AI_DeviceInfoSetPriority(OH_AI_DeviceInfoHandle device_info, OH_AI_Priority priority); + +/// \brief Obtain the NNRT priority, Only valid for NNRT. +/// +/// \param[in] device_info Device info object handle. +/// +/// \return NNRT priority. +OH_AI_API OH_AI_Priority OH_AI_DeviceInfoGetPriority(const OH_AI_DeviceInfoHandle device_info); + +/// \brief Add extension of key/value format to device info, Only valid for NNRT. +/// +/// \param[in] device_info Device info object handle. +/// \param[in] name The content of key as a C string. +/// \param[in] value The pointer to the value, which is a byte array. +/// \param[in] value_size The size of the value, which is a byte array. +/// +/// \return OH_AI_STATUS_SUCCESS if success, or detail error code if failed. +OH_AI_API OH_AI_Status OH_AI_DeviceInfoAddExtension(OH_AI_DeviceInfoHandle device_info, const char *name, const char *value, size_t value_size); #ifdef __cplusplus } #endif diff --git a/include/c_api/model_c.h b/include/c_api/model_c.h index 12a46bcd..2286e673 100644 --- a/include/c_api/model_c.h +++ b/include/c_api/model_c.h @@ -26,6 +26,8 @@ extern "C" { typedef void *OH_AI_ModelHandle; +typedef void *OH_AI_TrainCfgHandle; + typedef struct OH_AI_TensorHandleArray { size_t handle_num; OH_AI_TensorHandle *handle_list; @@ -168,6 +170,182 @@ OH_AI_API OH_AI_TensorHandle OH_AI_ModelGetInputByTensorName(const OH_AI_ModelHa /// \return The output tensor handle with the given name, if the name is not found, an NULL is returned. OH_AI_API OH_AI_TensorHandle OH_AI_ModelGetOutputByTensorName(const OH_AI_ModelHandle model, const char *tensor_name); +/// \brief Create a TrainCfg object. Only valid for Lite Train. +/// +/// \return TrainCfg object handle. +OH_AI_API OH_AI_TrainCfgHandle OH_AI_TrainCfgCreate(); + +/// \brief Destroy the train_cfg object. Only valid for Lite Train. +/// +/// \param[in] train_cfg TrainCfg object handle. +OH_AI_API void OH_AI_TrainCfgDestroy(OH_AI_TrainCfgHandle *train_cfg); + +/// \brief Obtains part of the name that identify a loss kernel. Only valid for Lite Train. +/// +/// \param[in] train_cfg TrainCfg object handle. +/// \param[in] num The num of loss_name. +/// +/// \return loss_name. +OH_AI_API char **OH_AI_TrainCfgGetLossName(OH_AI_TrainCfgHandle train_cfg, size_t *num); + +/// \brief Set part of the name that identify a loss kernel. Only valid for Lite Train. +/// +/// \param[in] train_cfg TrainCfg object handle. +/// \param[in] loss_name define part of the name that identify a loss kernel. +/// \param[in] num The num of loss_name. +OH_AI_API void OH_AI_TrainCfgSetLossName(OH_AI_TrainCfgHandle train_cfg, const char **loss_name, size_t num); + +/// \brief Obtains optimization level of the train_cfg. Only valid for Lite Train. +/// +/// \param[in] train_cfg TrainCfg object handle. +/// +/// \return OH_AI_OptimizationLevel. +OH_AI_API OH_AI_OptimizationLevel OH_AI_TrainCfgGetOptimizationLevel(OH_AI_TrainCfgHandle train_cfg); + +/// \brief Set optimization level of the train_cfg. Only valid for Lite Train. +/// +/// \param[in] train_cfg TrainCfg object handle. +/// \param[in] level The optimization level of train_cfg. +OH_AI_API void OH_AI_TrainCfgSetOptimizationLevel(OH_AI_TrainCfgHandle train_cfg, OH_AI_OptimizationLevel level); + +/// \brief Build the train model from model buffer so that it can run on a device. Only valid for Lite Train. +/// +/// \param[in] model Model object handle. +/// \param[in] model_data Define the buffer read from a model file. +/// \param[in] data_size Define bytes number of model file buffer. +/// \param[in] model_type Define The type of model file. +/// \param[in] model_context Define the context used to store options during execution. +/// \param[in] train_cfg Define the config used by training. +/// +/// \return OH_AI_Status. +OH_AI_API OH_AI_Status OH_AI_TrainModelBuild(OH_AI_ModelHandle model, const void *model_data, size_t data_size, + OH_AI_ModelType model_type, const OH_AI_ContextHandle model_context, + const OH_AI_TrainCfgHandle train_cfg); + +/// \brief Build the train model from model file buffer so that it can run on a device. Only valid for Lite Train. +/// +/// \param[in] model Model object handle. +/// \param[in] model_path Define the model path. +/// \param[in] model_type Define The type of model file. +/// \param[in] model_context Define the context used to store options during execution. +/// \param[in] train_cfg Define the config used by training. +/// +/// \return OH_AI_Status. +OH_AI_API OH_AI_Status OH_AI_TrainModelBuildFromFile(OH_AI_ModelHandle model, const char *model_path, + OH_AI_ModelType model_type, const OH_AI_ContextHandle model_context, + const OH_AI_TrainCfgHandle train_cfg); + +/// \brief Train model by step. Only valid for Lite Train. +/// +/// \param[in] model Model object handle. +/// \param[in] before CallBack before predict. +/// \param[in] after CallBack after predict. +/// +/// \return OH_AI_Status. +OH_AI_API OH_AI_Status OH_AI_RunStep(OH_AI_ModelHandle model, const OH_AI_KernelCallBack before, + const OH_AI_KernelCallBack after); + +/// \brief Sets the Learning Rate of the training. Only valid for Lite Train. +/// +/// \param[in] learning_rate to set. +/// +/// \return OH_AI_Status of operation. +OH_AI_API OH_AI_Status OH_AI_ModelSetLearningRate(OH_AI_ModelHandle model, float learning_rate); + +/// \brief Obtains the Learning Rate of the optimizer. Only valid for Lite Train. +/// +/// \return Learning rate. 0.0 if no optimizer was found. +OH_AI_API float OH_AI_ModelGetLearningRate(OH_AI_ModelHandle model); + +/// \brief Obtains all weights tensors of the model. Only valid for Lite Train. +/// +/// \param[in] model Model object handle. +/// +/// \return The vector that includes all gradient tensors. +OH_AI_API OH_AI_TensorHandleArray OH_AI_ModelGetWeights(OH_AI_ModelHandle model); + +/// \brief update weights tensors of the model. Only valid for Lite Train. +/// +/// \param[in] new_weights A vector new weights. +/// +/// \return OH_AI_Status +OH_AI_API OH_AI_Status OH_AI_ModelUpdateWeights(OH_AI_ModelHandle model, const OH_AI_TensorHandleArray new_weights); + +/// \brief Get the model running mode. +/// +/// \param[in] model Model object handle. +/// +/// \return Is Train Mode or not. +OH_AI_API bool OH_AI_ModelGetTrainMode(OH_AI_ModelHandle model); + +/// \brief Set the model running mode. Only valid for Lite Train. +/// +/// \param[in] model Model object handle. +/// \param[in] train True means model runs in Train Mode, otherwise Eval Mode. +/// +/// \return OH_AI_Status. +OH_AI_API OH_AI_Status OH_AI_ModelSetTrainMode(OH_AI_ModelHandle model, bool train); + +/// \brief Setup training with virtual batches. Only valid for Lite Train. +/// +/// \param[in] model Model object handle. +/// \param[in] virtual_batch_multiplier - virtual batch multiplier, use any number < 1 to disable. +/// \param[in] lr - learning rate to use for virtual batch, -1 for internal configuration. +/// \param[in] momentum - batch norm momentum to use for virtual batch, -1 for internal configuration. +/// +/// \return OH_AI_Status. +OH_AI_API OH_AI_Status OH_AI_ModelSetupVirtualBatch(OH_AI_ModelHandle model, int virtual_batch_multiplier, float lr, + float momentum); + +/// \brief Export training model from file. Only valid for Lite Train. +/// +/// \param[in] model The model data. +/// \param[in] model_type The model file type. +/// \param[in] model_file The exported model file. +/// \param[in] quantization_type The quantification type. +/// \param[in] export_inference_only Whether to export a reasoning only model. +/// \param[in] output_tensor_name The set the name of the output tensor of the exported reasoning model, default as +/// empty, and export the complete reasoning model. +/// \param[in] num The number of output_tensor_name. +/// +/// \return OH_AI_Status. +OH_AI_API OH_AI_Status OH_AI_ExportModel(OH_AI_ModelHandle model, OH_AI_ModelType model_type, const char *model_file, + OH_AI_QuantizationType quantization_type, bool export_inference_only, + char **output_tensor_name, size_t num); + +/// \brief Export training model from buffer. Only valid for Lite Train. +/// +/// \param[in] model The model data. +/// \param[in] model_type The model file type. +/// \param[in] model_data The exported model buffer. +/// \param[in] data_size The exported model buffer size. +/// \param[in] quantization_type The quantification type. +/// \param[in] export_inference_only Whether to export a reasoning only model. +/// \param[in] output_tensor_name The set the name of the output tensor of the exported reasoning model, default as +/// empty, and export the complete reasoning model. +/// \param[in] num The number of output_tensor_name. +/// +/// \return OH_AI_Status. +OH_AI_API OH_AI_Status OH_AI_ExportModelBuffer(OH_AI_ModelHandle model, OH_AI_ModelType model_type, char **model_data, + size_t *data_size, OH_AI_QuantizationType quantization_type, + bool export_inference_only, char **output_tensor_name, size_t num); + +/// \brief Export model's weights, which can be used in micro only. Only valid for Lite Train. +/// +/// \param[in] model The model data. +/// \param[in] model_type The model file type. +/// \param[in] weight_file The path of exported weight file. +/// \param[in] is_inference Whether to export weights from a reasoning model. Currently, only support this is `true`. +/// \param[in] enable_fp16 Float-weight is whether to be saved in float16 format. +/// \param[in] changeable_weights_name The set the name of these weight tensors, whose shape is changeable. +/// \param[in] num The number of changeable_weights_name. +/// +/// \return OH_AI_Status. +OH_AI_API OH_AI_Status OH_AI_ExportWeightsCollaborateWithMicro(OH_AI_ModelHandle model, OH_AI_ModelType model_type, + const char *weight_file, bool is_inference, + bool enable_fp16, char **changeable_weights_name, + size_t num); + #ifdef __cplusplus } #endif diff --git a/include/c_api/tensor_c.h b/include/c_api/tensor_c.h index f18ba163..6d2aaab6 100644 --- a/include/c_api/tensor_c.h +++ b/include/c_api/tensor_c.h @@ -17,6 +17,7 @@ #define MINDSPORE_INCLUDE_C_API_TENSOE_C_H #include +#include "include/c_api/status_c.h" #include "include/c_api/types_c.h" #include "include/c_api/data_type_c.h" #include "include/c_api/format_c.h" @@ -112,6 +113,19 @@ OH_AI_API OH_AI_Format OH_AI_TensorGetFormat(const OH_AI_TensorHandle tensor); /// \param[in] data A pointer to the data of the tensor. OH_AI_API void OH_AI_TensorSetData(OH_AI_TensorHandle tensor, void *data); +/// \brief Set the data for the tensor with user-allocated data buffer. +/// The main purpose of this interface is providing a way of using memory already allocated by user as the Model's +/// input, but not which allocated inside the Model object. It can reduce one copy. +/// Note: The tensor won't free the data provided by invoker. Invoker has the responsibility to free it. And this +/// free action should not be preformed before destruction of the tensor. +/// +/// \param[in] tensor Tensor object handle. +/// \param[in] data A pointer to the user data buffer. +/// \param[in] data the byte size of the user data buffer. +/// +/// \return OH_AI_STATUS_SUCCESS if success, or detail error code if failed. +OH_AI_API OH_AI_Status OH_AI_TensorSetUserData(OH_AI_TensorHandle tensor, void *data, size_t data_size); + /// \brief Obtain the data pointer of the tensor. /// /// \param[in] tensor Tensor object handle. diff --git a/include/c_api/types_c.h b/include/c_api/types_c.h index dba54ffa..e520e336 100644 --- a/include/c_api/types_c.h +++ b/include/c_api/types_c.h @@ -40,10 +40,65 @@ typedef enum OH_AI_DeviceType { OH_AI_DEVICETYPE_KIRIN_NPU, // add new type here // ohos-only device range: [60, 80) - OH_AI_DEVICETYPE__NNRT = 60, + OH_AI_DEVICETYPE_NNRT = 60, OH_AI_DEVICETYPE_INVALID = 100, } OH_AI_DeviceType; +typedef enum OH_AI_NNRTDeviceType { + /** Devices that are not CPU, GPU, or dedicated accelerator */ + OH_AI_NNRTDEVICE_OTHERS = 0, + /** CPU device */ + OH_AI_NNRTDEVICE_CPU = 1, + /** GPU device */ + OH_AI_NNRTDEVICE_GPU = 2, + /** Dedicated hardware accelerator */ + OH_AI_NNRTDEVICE_ACCELERATOR = 3, +} OH_AI_NNRTDeviceType; + +typedef enum OH_AI_PerformanceMode { + /** No performance mode preference */ + OH_AI_PERFORMANCE_NONE = 0, + /** Low power consumption mode*/ + OH_AI_PERFORMANCE_LOW = 1, + /** Medium performance mode */ + OH_AI_PERFORMANCE_MEDIUM = 2, + /** High performance mode */ + OH_AI_PERFORMANCE_HIGH = 3, + /** Ultimate performance mode */ + OH_AI_PERFORMANCE_EXTREME = 4 +} OH_AI_PerformanceMode; + +typedef enum OH_AI_Priority { + /** No priority preference */ + OH_AI_PRIORITY_NONE = 0, + /** Low priority */ + OH_AI_PRIORITY_LOW = 1, + /** Medium priority */ + OH_AI_PRIORITY_MEDIUM = 2, + /** High priority */ + OH_AI_PRIORITY_HIGH = 3 +} OH_AI_Priority; + +typedef enum OH_AI_OptimizationLevel { + /** Do not change */ + OH_AI_KO0 = 0, + /** Cast network to float16, keep batchnorm and loss in float32 */ + OH_AI_KO2 = 2, + /** Cast network to float16, including bacthnorm */ + OH_AI_KO3 = 3, + /** Choose optimization based on device */ + OH_AI_KAUTO = 4, + OH_AI_KOPTIMIZATIONTYPE = 0xFFFFFFFF +} OH_AI_OptimizationLevel; + +typedef enum OH_AI_QuantizationType { + OH_AI_NO_QUANT = 0, + OH_AI_WEIGHT_QUANT = 1, + OH_AI_FULL_QUANT = 2, + OH_AI_UNKNOWN_QUANT_TYPE = 0xFFFFFFFF +} OH_AI_QuantizationType; + +typedef struct NNRTDeviceDesc NNRTDeviceDesc; #ifdef __cplusplus } #endif diff --git a/include/sdk_api/context.h b/include/sdk_api/context.h index 5bfc9279..e12b8d6f 100644 --- a/include/sdk_api/context.h +++ b/include/sdk_api/context.h @@ -174,6 +174,109 @@ OH_AI_API void OH_AI_DeviceInfoSetFrequency(OH_AI_DeviceInfoHandle device_info, /// \return NPU frequency OH_AI_API int OH_AI_DeviceInfoGetFrequency(const OH_AI_DeviceInfoHandle device_info); +/// \brief Obtain the all device descriptions in NNRT. +/// +/// \param[out] num Number of NNRT device description. +/// +/// \return NNRT device description array. +OH_AI_API NNRTDeviceDesc *OH_AI_GetAllNNRTDeviceDescs(size_t *num); + +/// \brief Obtain the specified element in NNRt device description array. +/// +/// \param[in] descs NNRT device description array. +/// \param[in] index Element index. +/// +/// \return NNRT device description. +OH_AI_API NNRTDeviceDesc *OH_AI_GetElementOfNNRTDeviceDescs(NNRTDeviceDesc *descs, size_t index); + +/// \brief Destroy the NNRT device descriptions returned by OH_AI_NNRTGetAllDeviceDescs(). +/// +/// \param[in] desc NNRT device description array. +OH_AI_API void OH_AI_DestroyAllNNRTDeviceDescs(NNRTDeviceDesc **desc); + +/// \brief Obtain the device id in NNRT device description. +/// +/// \param[in] desc pointer to the NNRT device description instance. +/// +/// \return NNRT device id. +OH_AI_API size_t OH_AI_GetDeviceIdFromNNRTDeviceDesc(const NNRTDeviceDesc *desc); + +/// \brief Obtain the device name in NNRT device description. +/// +/// \param[in] desc pointer to the NNRT device description instance. +/// +/// \return NNRT device name. +OH_AI_API const char *OH_AI_GetNameFromNNRTDeviceDesc(const NNRTDeviceDesc *desc); + +/// \brief Obtain the device type in NNRT device description. +/// +/// \param[in] desc pointer to the NNRT device description instance. +/// +/// \return NNRT device type. +OH_AI_API OH_AI_NNRTDeviceType OH_AI_GetTypeFromNNRTDeviceDesc(const NNRTDeviceDesc *desc); + +/// \brief Create the NNRT device info by exactly matching the specific device name. +/// +/// \param[in] name NNRt device name. +/// +/// \return Device info object handle. +OH_AI_API OH_AI_DeviceInfoHandle OH_AI_CreateNNRTDeviceInfoByName(const char *name); + +/// \brief Create the NNRT device info by finding the first device with the specific device type. +/// +/// \param[in] name NNRt device type. +/// +/// \return Device info object handle. +OH_AI_API OH_AI_DeviceInfoHandle OH_AI_CreateNNRTDeviceInfoByType(OH_AI_NNRTDeviceType type); + +/// \brief Set the NNRT device id, Only valid for NNRT. +/// +/// \param[in] device_info Device info object handle. +/// \param[in] device_id NNRT device id. +OH_AI_API void OH_AI_DeviceInfoSetDeviceId(OH_AI_DeviceInfoHandle device_info, size_t device_id); + +/// \brief Obtain the NNRT device id, Only valid for NNRT. +/// +/// \param[in] device_info Device info object handle. +/// +/// \return NNRT device id. +OH_AI_API size_t OH_AI_DeviceInfoGetDeviceId(const OH_AI_DeviceInfoHandle device_info); + +/// \brief Set the NNRT performance mode, Only valid for NNRT. +/// +/// \param[in] device_info Device info object handle. +/// \param[in] device_id NNRT performance mode. +OH_AI_API void OH_AI_DeviceInfoSetPerformanceMode(OH_AI_DeviceInfoHandle device_info, OH_AI_PerformanceMode mode); + +/// \brief Obtain the NNRT performance mode, Only valid for NNRT. +/// +/// \param[in] device_info Device info object handle. +/// +/// \return NNRT performance mode. +OH_AI_API OH_AI_PerformanceMode OH_AI_DeviceInfoGetPerformanceMode(const OH_AI_DeviceInfoHandle device_info); + +/// \brief Set the NNRT priority, Only valid for NNRT. +/// +/// \param[in] device_info Device info object handle. +/// \param[in] device_id NNRT priority. +OH_AI_API void OH_AI_DeviceInfoSetPriority(OH_AI_DeviceInfoHandle device_info, OH_AI_Priority priority); + +/// \brief Obtain the NNRT priority, Only valid for NNRT. +/// +/// \param[in] device_info Device info object handle. +/// +/// \return NNRT priority. +OH_AI_API OH_AI_Priority OH_AI_DeviceInfoGetPriority(const OH_AI_DeviceInfoHandle device_info); + +/// \brief Add extension of key/value format to device info, Only valid for NNRT. +/// +/// \param[in] device_info Device info object handle. +/// \param[in] name The content of key as a C string. +/// \param[in] value The pointer to the value, which is a byte array. +/// \param[in] value_size The size of the value, which is a byte array. +/// +/// \return OH_AI_STATUS_SUCCESS if success, or detail error code if failed. +OH_AI_API OH_AI_Status OH_AI_DeviceInfoAddExtension(OH_AI_DeviceInfoHandle device_info, const char *name, const char *value, size_t value_size); #ifdef __cplusplus } #endif diff --git a/include/sdk_api/tensor.h b/include/sdk_api/tensor.h index f6ba02cd..3dad04ac 100644 --- a/include/sdk_api/tensor.h +++ b/include/sdk_api/tensor.h @@ -17,6 +17,7 @@ #define MINDSPORE_INCLUDE_C_API_TENSOE_C_H #include +#include "mindspore/status.h" #include "mindspore/types.h" #include "mindspore/data_type.h" #include "mindspore/format.h" @@ -140,6 +141,18 @@ OH_AI_API int64_t OH_AI_TensorGetElementNum(const OH_AI_TensorHandle tensor); /// \return The data size of the tensor. OH_AI_API size_t OH_AI_TensorGetDataSize(const OH_AI_TensorHandle tensor); +/// \brief Set the data for the tensor with user-allocated data buffer. +/// The main purpose of this interface is providing a way of using memory already allocated by user as the Model's +/// input, but not which allocated inside the Model object. It can reduce one copy. +/// Note: The tensor won't free the data provided by invoker. Invoker has the responsibility to free it. And this +/// free action should not be preformed before destruction of the tensor. +/// +/// \param[in] tensor Tensor object handle. +/// \param[in] data A pointer to the user data buffer. +/// \param[in] data the byte size of the user data buffer. +/// +/// \return OH_AI_STATUS_SUCCESS if success, or detail error code if failed. +OH_AI_API OH_AI_Status OH_AI_TensorSetUserData(OH_AI_TensorHandle tensor, void *data, size_t data_size); #ifdef __cplusplus } #endif diff --git a/include/sdk_api/types.h b/include/sdk_api/types.h index a39c6daa..d38660b0 100644 --- a/include/sdk_api/types.h +++ b/include/sdk_api/types.h @@ -40,10 +40,46 @@ typedef enum OH_AI_DeviceType { OH_AI_DEVICETYPE_KIRIN_NPU, // add new type here // ohos-only device range: [60, 80) - OH_AI_DeviceType_NNRT = 60, + OH_AI_DEVICETYPE_NNRT = 60, OH_AI_DEVICETYPE_INVALID = 100, } OH_AI_DeviceType; +typedef enum OH_AI_NNRTDeviceType { + /** Devices that are not CPU, GPU, or dedicated accelerator */ + OH_AI_NNRTDEVICE_OTHERS = 0, + /** CPU device */ + OH_AI_NNRTDEVICE_CPU = 1, + /** GPU device */ + OH_AI_NNRTDEVICE_GPU = 2, + /** Dedicated hardware accelerator */ + OH_AI_NNRTDEVICE_ACCELERATOR = 3, +} OH_AI_NNRTDeviceType; + +typedef enum OH_AI_PerformanceMode { + /** No performance mode preference */ + OH_AI_PERFORMANCE_NONE = 0, + /** Low power consumption mode*/ + OH_AI_PERFORMANCE_LOW = 1, + /** Medium performance mode */ + OH_AI_PERFORMANCE_MEDIUM = 2, + /** High performance mode */ + OH_AI_PERFORMANCE_HIGH = 3, + /** Ultimate performance mode */ + OH_AI_PERFORMANCE_EXTREME = 4 +} OH_AI_PerformanceMode; + +typedef enum OH_AI_Priority { + /** No priority preference */ + OH_AI_PRIORITY_NONE = 0, + /** Low priority */ + OH_AI_PRIORITY_LOW = 1, + /** Medium priority */ + OH_AI_PRIORITY_MEDIUM = 2, + /** High priority */ + OH_AI_PRIORITY_HIGH = 3 +} OH_AI_Priority; + +typedef struct NNRTDeviceDesc NNRTDeviceDesc; #ifdef __cplusplus } #endif diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/BUILD.gn b/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/BUILD.gn index 7bbc3782..103e53b7 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/BUILD.gn +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/BUILD.gn @@ -498,6 +498,9 @@ infer_shape_sources = [ "infer/crop_infer.c", "infer/cumsum_infer.c", "infer/custom_gru_infer.c", + "infer/custom_masked_fill_infer.c", + "infer/custom_is_inf_infer.c", + "infer/custom_tensor_scatter_max_infer.c", "infer/decoder_layer_infer.c", "infer/deconv2d_infer.c", "infer/depth_to_space_infer.c", diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/CMakeLists.txt b/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/CMakeLists.txt index c1685a65..6fef44fd 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/CMakeLists.txt +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/CMakeLists.txt @@ -238,7 +238,7 @@ endif() if(PLATFORM_ARM) set(NO_FAST_MATH_OPTI ${NNACL_DIR}/fp32/resize_fp32.c) set_source_files_properties(${NO_FAST_MATH_OPTI} PROPERTIES LANGUAGE C - COMPILE_FLAGS "${CMAKE_C_FLAGS} -fno-fast-math") + COMPILE_FLAGS "${CMAKE_C_FLAGS} -w -fno-fast-math") endif() add_library(nnacl_mid OBJECT ${KERNEL_SRC} ${TRAIN_SRC} ${ASSEMBLY_SRC} ${MS_X86_SIMD_SRC}) diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/avx/scatter_nd_binary_avx.h b/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/avx/scatter_nd_binary_avx.h new file mode 100644 index 00000000..14bd1d76 --- /dev/null +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/avx/scatter_nd_binary_avx.h @@ -0,0 +1,66 @@ +/** +* Copyright 2023 Huawei Technologies Co., Ltd +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*/ +#ifndef NNACL_BASE_SCATTER_ND_BINARY_AVX_H_ +#define NNACL_BASE_SCATTER_ND_BINARY_AVX_H_ + +#include "nnacl/intrinsics/ms_simd_instructions.h" +#include "nnacl/intrinsics/ms_simd_avx_instructions.h" + +#ifdef __cplusplus +extern "C" { +#endif +#pragma GCC push_options +#pragma GCC target("avx", "avx2") +#define MS_SIMD_INSTRUCTION MS_SIMD_AVX_INSTRUCTION +#define BLOCK_NUM 8 +#define MS_SIMD_AVX + +static inline int ScatterNDAddFp32AVX(int index, const float *update, int size, float *output) { + for (int block_max_size = size - BLOCK_NUM + 1; index < block_max_size; index += BLOCK_NUM) { + SIMD_ST_F32(output + index, SIMD_ADD_F32(SIMD_LD_F32(output + index), SIMD_LD_F32(update + index))); + } + return index; +} + +static inline int ScatterNDAddInt32AVX(int index, const int *update, int size, int *output) { + for (int block_max_size = size - BLOCK_NUM + 1; index < block_max_size; index += BLOCK_NUM) { + SIMD_ST_EPI32(output + index, SIMD_ADD_EPI32(SIMD_LD_EPI32(output + index), SIMD_LD_EPI32(update + index))); + } + return index; +} + +static inline int ScatterNDMaxFp32AVX(int index, const float *update, int size, float *output) { + for (int block_max_size = size - BLOCK_NUM + 1; index < block_max_size; index += BLOCK_NUM) { + SIMD_ST_F32(output + index, SIMD_MAX_F32(SIMD_LD_F32(output + index), SIMD_LD_F32(update + index))); + } + return index; +} + +static inline int ScatterNDMaxInt32AVX(int index, const int *update, int size, int *output) { + for (int block_max_size = size - BLOCK_NUM + 1; index < block_max_size; index += BLOCK_NUM) { + SIMD_ST_EPI32(output + index, SIMD_MAX_EPI32(SIMD_LD_EPI32(output + index), SIMD_LD_EPI32(update + index))); + } + return index; +} + +#undef MS_SIMD_INSTRUCTION +#undef BLOCK_NUM +#pragma GCC pop_options +#undef MS_SIMD_AVX +#ifdef __cplusplus +} +#endif +#endif // NNACL_BASE_SCATTER_ND_BINARY_AVX_H_ diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/avx512/scatter_nd_binary_avx512.h b/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/avx512/scatter_nd_binary_avx512.h new file mode 100644 index 00000000..abf024c5 --- /dev/null +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/avx512/scatter_nd_binary_avx512.h @@ -0,0 +1,66 @@ +/** +* Copyright 2023 Huawei Technologies Co., Ltd +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*/ +#ifndef NNACL_BASE_SCATTER_ND_BINARY_AVX512_H_ +#define NNACL_BASE_SCATTER_ND_BINARY_AVX512_H_ + +#include "nnacl/intrinsics/ms_simd_instructions.h" +#include "nnacl/intrinsics/ms_simd_avx512_instructions.h" + +#ifdef __cplusplus +extern "C" { +#endif +#pragma GCC push_options +#pragma GCC target("avx512f") +#define MS_SIMD_INSTRUCTION MS_SIMD_AVX512_INSTRUCTION +#define BLOCK_NUM 16 +#define MS_SIMD_AVX512 + +static inline int ScatterNDAddFp32AVX512(int index, const float *update, int size, float *output) { + for (int block_max_size = size - BLOCK_NUM + 1; index < block_max_size; index += BLOCK_NUM) { + SIMD_ST_F32(output + index, SIMD_ADD_F32(SIMD_LD_F32(output + index), SIMD_LD_F32(update + index))); + } + return index; +} + +static inline int ScatterNDAddInt32AVX512(int index, const int *update, int size, int *output) { + for (int block_max_size = size - BLOCK_NUM + 1; index < block_max_size; index += BLOCK_NUM) { + SIMD_ST_EPI32(output + index, SIMD_ADD_EPI32(SIMD_LD_EPI32(output + index), SIMD_LD_EPI32(update + index))); + } + return index; +} + +static inline int ScatterNDMaxFp32AVX512(int index, const float *update, int size, float *output) { + for (int block_max_size = size - BLOCK_NUM + 1; index < block_max_size; index += BLOCK_NUM) { + SIMD_ST_F32(output + index, SIMD_MAX_F32(SIMD_LD_F32(output + index), SIMD_LD_F32(update + index))); + } + return index; +} + +static inline int ScatterNDMaxInt32AVX512(int index, const int *update, int size, int *output) { + for (int block_max_size = size - BLOCK_NUM + 1; index < block_max_size; index += BLOCK_NUM) { + SIMD_ST_EPI32(output + index, SIMD_MAX_EPI32(SIMD_LD_EPI32(output + index), SIMD_LD_EPI32(update + index))); + } + return index; +} + +#undef MS_SIMD_INSTRUCTION +#undef BLOCK_NUM +#pragma GCC pop_options +#undef MS_SIMD_AVX512 +#ifdef __cplusplus +} +#endif +#endif // NNACL_BASE_SCATTER_ND_BINARY_AVX512_H_ diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/base/scatter_nd_binary.c b/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/base/scatter_nd_binary.c index bca71f55..e496bb4b 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/base/scatter_nd_binary.c +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/base/scatter_nd_binary.c @@ -77,3 +77,31 @@ int ScatterNDUpdate(void *output, const void *update, int *output_unit_offsets, } return NNACL_OK; } + +int ScatterNDMax(const void *update, void *output, int *output_unit_offsets, const ScatterNDParameter *param, int type, + int task_id) { + if (update == NULL || output == NULL || output_unit_offsets == NULL || param == NULL) { + return NNACL_NULL_PTR; + } + if (param->op_parameter.thread_num_ == 0) { + return NNACL_ERR; + } + int unit_per_thread = UP_DIV(param->num_unit, param->op_parameter.thread_num_); + int begin = unit_per_thread * task_id; + int end = MSMIN(begin + unit_per_thread, param->num_unit); + if (type == 0) { + float *update_fp32 = (float *)update; + float *output_fp32 = (float *)output; + for (int i = begin; i < end; i++) { + const float *update_data = update_fp32 + i * param->unit_size; + float *output_data = output_fp32 + output_unit_offsets[i]; + int j = 0; + + SIMD_RUN_NO_SCALAR(ScatterNDMaxFp32, j, update_data, param->unit_size, output_data); + for (; j < param->unit_size; j++) { + output_data[j] = fmaxf(update_data[j], output_data[j]); + } + } + } + return NNACL_OK; +} diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/base/scatter_nd_binary.h b/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/base/scatter_nd_binary.h index 3af55335..36657cd9 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/base/scatter_nd_binary.h +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/base/scatter_nd_binary.h @@ -27,6 +27,9 @@ int ScatterNDUpdate(void *output, const void *update, int *output_unit_offsets, int ScatterNDAdd(const void *update, void *output, int *output_unit_offsets, const ScatterNDParameter *param, int type, int task_id); + +int ScatterNDMax(const void *update, void *output, int *output_unit_offsets, const ScatterNDParameter *param, int type, + int task_id); #ifdef __cplusplus } #endif diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/base/scatter_nd_binary_simd.h.in b/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/base/scatter_nd_binary_simd.h.in index c72d9cc2..46bb20ce 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/base/scatter_nd_binary_simd.h.in +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/base/scatter_nd_binary_simd.h.in @@ -38,6 +38,20 @@ static inline int ScatterNDAddInt32@SIMD_INSTRUCTION@(int index, const int *upda return index; } +static inline int ScatterNDMaxFp32@SIMD_INSTRUCTION@(int index, const float *update, int size, float *output) { +for (int block_max_size = size - BLOCK_NUM + 1; index < block_max_size; index += BLOCK_NUM) { +SIMD_ST_F32(output + index, SIMD_MAX_F32(SIMD_LD_F32(output + index), SIMD_LD_F32(update + index))); +} +return index; +} + +static inline int ScatterNDMaxInt32@SIMD_INSTRUCTION@(int index, const int *update, int size, int *output) { +for (int block_max_size = size - BLOCK_NUM + 1; index < block_max_size; index += BLOCK_NUM) { +SIMD_ST_EPI32(output + index, SIMD_MAX_EPI32(SIMD_LD_EPI32(output + index), SIMD_LD_EPI32(update + index))); +} +return index; +} + @SIMD_INSTRUCTION_END@ #ifdef __cplusplus } diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/custom_is_inf_parameter.h b/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/custom_is_inf_parameter.h new file mode 100644 index 00000000..e1eae394 --- /dev/null +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/custom_is_inf_parameter.h @@ -0,0 +1,26 @@ +/** + * Copyright 2023 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef MINDSPORE_NNACL_CUSTOM_IS_INF_PARAMETER_H_ +#define MINDSPORE_NNACL_CUSTOM_IS_INF_PARAMETER_H_ + +#include "nnacl/op_base.h" + +typedef struct CustomIsInfParameter { + // Primitive parameter + OpParameter op_parameter_; +} CustomIsInfParameter; + +#endif // MINDSPORE_NNACL_CUSTOM_IS_INF_PARAMETER_H_ diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/custom_masked_fill_parameter.h b/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/custom_masked_fill_parameter.h new file mode 100644 index 00000000..047d3d3f --- /dev/null +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/custom_masked_fill_parameter.h @@ -0,0 +1,26 @@ +/** + * Copyright 2023 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef MINDSPORE_NNACL_CUSTOM_MASKED_FILL_PARAMETER_H_ +#define MINDSPORE_NNACL_CUSTOM_MASKED_FILL_PARAMETER_H_ + +#include "nnacl/op_base.h" + +typedef struct CustomMaskedFillParameter { + // Primitive parameter + OpParameter op_parameter_; +} CustomMaskedFillParameter; + +#endif // MINDSPORE_NNACL_CUSTOM_MASKED_FILL_PARAMETER_H_ diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/custom_tensor_scatter_max_parameter.h b/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/custom_tensor_scatter_max_parameter.h new file mode 100644 index 00000000..ba6940db --- /dev/null +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/custom_tensor_scatter_max_parameter.h @@ -0,0 +1,26 @@ +/** + * Copyright 2023 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef MINDSPORE_NNACL_CUSTOM_TENSOR_SCATTER_MAX_PARAMETER_H_ +#define MINDSPORE_NNACL_CUSTOM_TENSOR_SCATTER_MAX_PARAMETER_H_ + +#include "nnacl/op_base.h" + +typedef struct CustomTensorScatterMaxParameter { + // Primitive parameter + OpParameter op_parameter_; +} CustomTensorScatterMaxParameter; + +#endif // MINDSPORE_NNACL_CUSTOM_TENSOR_SCATTER_MAX_PARAMETER_H_ diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/infer/custom_is_inf_infer.c b/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/infer/custom_is_inf_infer.c new file mode 100644 index 00000000..fc87d157 --- /dev/null +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/infer/custom_is_inf_infer.c @@ -0,0 +1,38 @@ +/** + * Copyright 2023 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "nnacl/infer/custom_is_inf_infer.h" +#include "nnacl/infer/infer_register.h" + +int CustomIsInfInferShape(const TensorC *const *inputs, size_t inputs_size, TensorC **outputs, size_t outputs_size, + OpParameter *parameter) { + int check_ret = CheckAugmentNullSize(inputs, inputs_size, outputs, outputs_size, parameter, C1NUM, C1NUM); + if (check_ret != NNACL_OK) { + return check_ret; + } + + const TensorC *input = inputs[0]; + TensorC *output = outputs[0]; + output->data_type_ = kNumberTypeBool; + output->format_ = input->format_; + if (!InferFlag(inputs, inputs_size)) { + return NNACL_INFER_INVALID; + } + SetShapeTensor(output, input); + return NNACL_OK; +} + +REG_INFER(CustomIsInf, PrimType_Inner_CustomIsInf, CustomIsInfInferShape) diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/infer/custom_is_inf_infer.h b/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/infer/custom_is_inf_infer.h new file mode 100644 index 00000000..d1b4b33d --- /dev/null +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/infer/custom_is_inf_infer.h @@ -0,0 +1,31 @@ +/** + * Copyright 2023 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef MINDSPORE_NNACL_CUSTOM_IS_INF_INFER_H +#define MINDSPORE_NNACL_CUSTOM_IS_INF_INFER_H + +#include "nnacl/infer/common_infer.h" + +#ifdef __cplusplus +extern "C" { +#endif + +int CustomIsInfInferShape(const TensorC *const *inputs, size_t inputs_size, TensorC **outputs, size_t outputs_size, + OpParameter *parameter); + +#ifdef __cplusplus +} +#endif +#endif // MINDSPORE_NNACL_CUSTOM_IS_INF_INFER_H diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/infer/custom_masked_fill_infer.c b/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/infer/custom_masked_fill_infer.c new file mode 100644 index 00000000..957a4d4f --- /dev/null +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/infer/custom_masked_fill_infer.c @@ -0,0 +1,37 @@ +/** + * Copyright 2023 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "nnacl/infer/custom_masked_fill_infer.h" +#include "nnacl/infer/infer_register.h" + +int CustomMaskedFillInferShape(const TensorC *const *inputs, size_t inputs_size, TensorC **outputs, size_t outputs_size, + OpParameter *parameter) { + int check_ret = CheckAugmentNullSize(inputs, inputs_size, outputs, outputs_size, parameter, C3NUM, C1NUM); + if (check_ret != NNACL_OK) { + return check_ret; + } + + const TensorC *input = inputs[0]; + TensorC *output = outputs[0]; + SetDataTypeFormat(output, input); + if (!InferFlag(inputs, inputs_size)) { + return NNACL_INFER_INVALID; + } + SetShapeTensor(output, input); + return NNACL_OK; +} + +REG_INFER(CustomMaskedFill, PrimType_Inner_CustomMaskedFill, CustomMaskedFillInferShape) diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/infer/custom_masked_fill_infer.h b/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/infer/custom_masked_fill_infer.h new file mode 100644 index 00000000..a8adbae2 --- /dev/null +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/infer/custom_masked_fill_infer.h @@ -0,0 +1,31 @@ +/** + * Copyright 2023 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef MINDSPORE_NNACL_CUSTOM_MASKED_FILL_INFER_H +#define MINDSPORE_NNACL_CUSTOM_MASKED_FILL_INFER_H + +#include "nnacl/infer/common_infer.h" + +#ifdef __cplusplus +extern "C" { +#endif + +int CustomMaskedFillInferShape(const TensorC *const *inputs, size_t inputs_size, TensorC **outputs, size_t outputs_size, + OpParameter *parameter); + +#ifdef __cplusplus +} +#endif +#endif // MINDSPORE_NNACL_CUSTOM_MASKED_FILL_INFER_H diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/infer/custom_tensor_scatter_max_infer.c b/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/infer/custom_tensor_scatter_max_infer.c new file mode 100644 index 00000000..be6716ba --- /dev/null +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/infer/custom_tensor_scatter_max_infer.c @@ -0,0 +1,37 @@ +/** + * Copyright 2023 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "nnacl/infer/custom_tensor_scatter_max_infer.h" +#include "nnacl/infer/infer_register.h" + +int CustomTensorScatterMaxInferShape(const TensorC *const *inputs, size_t inputs_size, TensorC **outputs, + size_t outputs_size, OpParameter *parameter) { + int check_ret = CheckAugmentNullSize(inputs, inputs_size, outputs, outputs_size, parameter, C3NUM, C1NUM); + if (check_ret != NNACL_OK) { + return check_ret; + } + + const TensorC *input = inputs[0]; + TensorC *output = outputs[0]; + SetDataTypeFormat(output, input); + if (!InferFlag(inputs, inputs_size)) { + return NNACL_INFER_INVALID; + } + SetShapeTensor(output, input); + return NNACL_OK; +} + +REG_INFER(CustomTensorScatterMax, PrimType_Inner_CustomTensorScatterMax, CustomTensorScatterMaxInferShape) diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/infer/custom_tensor_scatter_max_infer.h b/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/infer/custom_tensor_scatter_max_infer.h new file mode 100644 index 00000000..641aa483 --- /dev/null +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/infer/custom_tensor_scatter_max_infer.h @@ -0,0 +1,31 @@ +/** + * Copyright 2023 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef MINDSPORE_NNACL_CUSTOM_TENSOR_SCATTER_MAX_INFER_H +#define MINDSPORE_NNACL_CUSTOM_TENSOR_SCATTER_MAX_INFER_H + +#include "nnacl/infer/common_infer.h" + +#ifdef __cplusplus +extern "C" { +#endif + +int CustomTensorScatterMaxInferShape(const TensorC *const *inputs, size_t inputs_size, TensorC **outputs, + size_t outputs_size, OpParameter *parameter); + +#ifdef __cplusplus +} +#endif +#endif // MINDSPORE_NNACL_CUSTOM_TENSOR_SCATTER_MAX_INFER_H diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/neon/scatter_nd_binary_neon.h b/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/neon/scatter_nd_binary_neon.h new file mode 100644 index 00000000..d7c34768 --- /dev/null +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/neon/scatter_nd_binary_neon.h @@ -0,0 +1,65 @@ +/** +* Copyright 2023 Huawei Technologies Co., Ltd +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*/ +#ifndef NNACL_BASE_SCATTER_ND_BINARY_NEON_H_ +#define NNACL_BASE_SCATTER_ND_BINARY_NEON_H_ + +#include "nnacl/intrinsics/ms_simd_instructions.h" +#include "nnacl/intrinsics/ms_simd_neon_instructions.h" + +#ifdef __cplusplus +extern "C" { +#endif + +#define MS_SIMD_INSTRUCTION MS_SIMD_NEON_INSTRUCTION +#define BLOCK_NUM 4 +#define MS_SIMD_NEON + +static inline int ScatterNDAddFp32NEON(int index, const float *update, int size, float *output) { + for (int block_max_size = size - BLOCK_NUM + 1; index < block_max_size; index += BLOCK_NUM) { + SIMD_ST_F32(output + index, SIMD_ADD_F32(SIMD_LD_F32(output + index), SIMD_LD_F32(update + index))); + } + return index; +} + +static inline int ScatterNDAddInt32NEON(int index, const int *update, int size, int *output) { + for (int block_max_size = size - BLOCK_NUM + 1; index < block_max_size; index += BLOCK_NUM) { + SIMD_ST_EPI32(output + index, SIMD_ADD_EPI32(SIMD_LD_EPI32(output + index), SIMD_LD_EPI32(update + index))); + } + return index; +} + +static inline int ScatterNDMaxFp32NEON(int index, const float *update, int size, float *output) { + for (int block_max_size = size - BLOCK_NUM + 1; index < block_max_size; index += BLOCK_NUM) { + SIMD_ST_F32(output + index, SIMD_MAX_F32(SIMD_LD_F32(output + index), SIMD_LD_F32(update + index))); + } + return index; +} + +static inline int ScatterNDMaxInt32NEON(int index, const int *update, int size, int *output) { + for (int block_max_size = size - BLOCK_NUM + 1; index < block_max_size; index += BLOCK_NUM) { + SIMD_ST_EPI32(output + index, SIMD_MAX_EPI32(SIMD_LD_EPI32(output + index), SIMD_LD_EPI32(update + index))); + } + return index; +} + +#undef MS_SIMD_INSTRUCTION +#undef BLOCK_NUM + +#undef MS_SIMD_NEON +#ifdef __cplusplus +} +#endif +#endif // NNACL_BASE_SCATTER_ND_BINARY_NEON_H_ diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/op_base.h b/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/op_base.h index 955a70a5..895f7e3d 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/op_base.h +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/op_base.h @@ -558,6 +558,10 @@ enum PrimType { PrimType_Inner_CustomGru = 10010, PrimType_Inner_CastGatherReduceFusion = 10011, PrimType_Inner_ReduceConcatFusion = 10012, + PrimType_Inner_ThirdPartyModel = 10013, + PrimType_Inner_CustomMaskedFill = 10014, + PrimType_Inner_CustomTensorScatterMax = 10015, + PrimType_Inner_CustomIsInf = 10016, PrimType_InnerOpMax, PrimType_InnerOpMin = PrimType_Inner_ToFormat }; diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/scatter_nd_binary_simd.h b/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/scatter_nd_binary_simd.h new file mode 100644 index 00000000..dd9878f7 --- /dev/null +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/scatter_nd_binary_simd.h @@ -0,0 +1,36 @@ +/** +* Copyright 2023 Huawei Technologies Co., Ltd +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*/ +#ifndef NNACL_SCATTER_ND_BINARY_SIMD_H_ +#define NNACL_SCATTER_ND_BINARY_SIMD_H_ + +#include "nnacl/intrinsics/ms_simd_instructions.h" +#ifdef ENABLE_AVX512 +#include "nnacl/avx512/scatter_nd_binary_avx512.h" +#endif + +#ifdef ENABLE_AVX +#include "nnacl/avx/scatter_nd_binary_avx.h" +#endif + +#ifdef ENABLE_SSE +#include "nnacl/sse/scatter_nd_binary_sse.h" +#endif + +#ifdef ENABLE_ARM +#include "nnacl/neon/scatter_nd_binary_neon.h" +#endif + +#endif diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/sse/scatter_nd_binary_sse.h b/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/sse/scatter_nd_binary_sse.h new file mode 100644 index 00000000..983d2923 --- /dev/null +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/sse/scatter_nd_binary_sse.h @@ -0,0 +1,66 @@ +/** +* Copyright 2023 Huawei Technologies Co., Ltd +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*/ +#ifndef NNACL_BASE_SCATTER_ND_BINARY_SSE_H_ +#define NNACL_BASE_SCATTER_ND_BINARY_SSE_H_ + +#include "nnacl/intrinsics/ms_simd_instructions.h" +#include "nnacl/intrinsics/ms_simd_sse_instructions.h" + +#ifdef __cplusplus +extern "C" { +#endif +#pragma GCC push_options +#pragma GCC target("sse4.1") +#define MS_SIMD_INSTRUCTION MS_SIMD_SSE_INSTRUCTION +#define BLOCK_NUM 4 +#define MS_SIMD_SSE + +static inline int ScatterNDAddFp32SSE(int index, const float *update, int size, float *output) { + for (int block_max_size = size - BLOCK_NUM + 1; index < block_max_size; index += BLOCK_NUM) { + SIMD_ST_F32(output + index, SIMD_ADD_F32(SIMD_LD_F32(output + index), SIMD_LD_F32(update + index))); + } + return index; +} + +static inline int ScatterNDAddInt32SSE(int index, const int *update, int size, int *output) { + for (int block_max_size = size - BLOCK_NUM + 1; index < block_max_size; index += BLOCK_NUM) { + SIMD_ST_EPI32(output + index, SIMD_ADD_EPI32(SIMD_LD_EPI32(output + index), SIMD_LD_EPI32(update + index))); + } + return index; +} + +static inline int ScatterNDMaxFp32SSE(int index, const float *update, int size, float *output) { + for (int block_max_size = size - BLOCK_NUM + 1; index < block_max_size; index += BLOCK_NUM) { + SIMD_ST_F32(output + index, SIMD_MAX_F32(SIMD_LD_F32(output + index), SIMD_LD_F32(update + index))); + } + return index; +} + +static inline int ScatterNDMaxInt32SSE(int index, const int *update, int size, int *output) { + for (int block_max_size = size - BLOCK_NUM + 1; index < block_max_size; index += BLOCK_NUM) { + SIMD_ST_EPI32(output + index, SIMD_MAX_EPI32(SIMD_LD_EPI32(output + index), SIMD_LD_EPI32(update + index))); + } + return index; +} + +#undef MS_SIMD_INSTRUCTION +#undef BLOCK_NUM +#pragma GCC pop_options +#undef MS_SIMD_SSE +#ifdef __cplusplus +} +#endif +#endif // NNACL_BASE_SCATTER_ND_BINARY_SSE_H_ diff --git a/mindspore/core/mindrt/BUILD.gn b/mindspore/core/mindrt/BUILD.gn index b56d5f5c..b0e7c70d 100644 --- a/mindspore/core/mindrt/BUILD.gn +++ b/mindspore/core/mindrt/BUILD.gn @@ -41,8 +41,15 @@ ohos_source_set("mindrt_obj") { "../../core/", ] + defines = [ + "ENABLE_MINDRT", + "MS_COMPILE_OHOS", + "BUILD_LITE", + ] + + external_deps = [ "hilog:libhilog" ] + remove_configs = [ "//build/config/compiler:no_rtti" ] - defines = [ "BUILD_LITE" ] part_name = "mindspore" subsystem_name = "thirdparty" diff --git a/mindspore/core/mindrt/src/thread/actor_threadpool.cc b/mindspore/core/mindrt/src/thread/actor_threadpool.cc index 70414757..c50c46e0 100644 --- a/mindspore/core/mindrt/src/thread/actor_threadpool.cc +++ b/mindspore/core/mindrt/src/thread/actor_threadpool.cc @@ -32,7 +32,7 @@ void ActorWorker::RunWithSpin() { } #if !defined(__APPLE__) && !defined(_MSC_VER) static std::atomic_int index{0}; - (void)pthread_setname_np(pthread_self(), ("ActorThread_" + std::to_string(index++)).c_str()); + (void)pthread_setname_np(pthread_self(), ("OS_Actor_" + std::to_string(index++)).c_str()); #endif #ifdef PLATFORM_86 // Some CPU kernels need set the flush zero mode to improve performance. diff --git a/mindspore/core/mindrt/src/thread/core_affinity.cc b/mindspore/core/mindrt/src/thread/core_affinity.cc index 33bf3529..a3478dff 100644 --- a/mindspore/core/mindrt/src/thread/core_affinity.cc +++ b/mindspore/core/mindrt/src/thread/core_affinity.cc @@ -344,12 +344,12 @@ int CoreAffinity::InitBindCoreId(size_t thread_num, BindMode bind_mode) { int CoreAffinity::SetAffinity() { return THREAD_OK; } #elif defined(BIND_CORE) int CoreAffinity::SetAffinity(const pthread_t &thread_id, cpu_set_t *cpu_set) { -#ifdef __ANDROID__ -#if __ANDROID_API__ >= 21 +#if defined(__ANDROID__) || defined(MS_COMPILE_OHOS) +#if (__ANDROID_API__ >= 21) || defined(MS_COMPILE_OHOS) THREAD_INFO("thread: %d, mask: %lu", pthread_gettid_np(thread_id), cpu_set->__bits[0]); int ret = sched_setaffinity(pthread_gettid_np(thread_id), sizeof(cpu_set_t), cpu_set); if (ret != THREAD_OK) { - THREAD_ERROR("bind thread %d to cpu failed. ERROR %d", pthread_gettid_np(thread_id), ret); + THREAD_ERROR("bind thread %d to cpu failed. ERROR %{public}d", pthread_gettid_np(thread_id), ret); return THREAD_ERROR; } #endif diff --git a/mindspore/core/mindrt/src/thread/core_affinity.h b/mindspore/core/mindrt/src/thread/core_affinity.h index 2dd2abd1..28b0967a 100644 --- a/mindspore/core/mindrt/src/thread/core_affinity.h +++ b/mindspore/core/mindrt/src/thread/core_affinity.h @@ -23,7 +23,7 @@ #ifdef PARALLEL_INFERENCE #define BIND_CORE #endif -#ifdef __ANDROID__ +#if defined(__ANDROID__) || defined(MS_COMPILE_OHOS) #define BIND_CORE #include #endif diff --git a/mindspore/core/mindrt/src/thread/parallel_threadpool.cc b/mindspore/core/mindrt/src/thread/parallel_threadpool.cc index 9e0dd25c..09c39f32 100644 --- a/mindspore/core/mindrt/src/thread/parallel_threadpool.cc +++ b/mindspore/core/mindrt/src/thread/parallel_threadpool.cc @@ -48,7 +48,7 @@ void ParallelWorker::ParallelRun() { SetAffinity(); } #if !defined(__APPLE__) && !defined(_MSC_VER) - (void)pthread_setname_np(pthread_self(), ("ParallelThread_" + std::to_string(worker_id_)).c_str()); + (void)pthread_setname_np(pthread_self(), ("OS_Parallel_" + std::to_string(worker_id_)).c_str()); #endif #ifdef PLATFORM_86 // Some CPU kernels need set the flush zero mode to improve performance. diff --git a/mindspore/core/mindrt/src/thread/threadlog.h b/mindspore/core/mindrt/src/thread/threadlog.h index 7ed917f1..b212a401 100644 --- a/mindspore/core/mindrt/src/thread/threadlog.h +++ b/mindspore/core/mindrt/src/thread/threadlog.h @@ -16,7 +16,9 @@ #ifndef MINDSPORE_CORE_MINDRT_RUNTIME_THREADPOOL_LOG_H_ #define MINDSPORE_CORE_MINDRT_RUNTIME_THREADPOOL_LOG_H_ - +#ifdef MS_COMPILE_OHOS +#include "hilog/log.h" +#endif namespace mindspore { #ifdef THREAD_POOL_DEBUG #include @@ -32,13 +34,35 @@ namespace mindspore { } #else #define THREAD_DEBUG(content, ...) -#define THREAD_INFO(content, ...) #define THREAD_TEST_TRUE(flag) + #if defined(__ANDROID__) +#define THREAD_INFO(content, ...) #include #define THREAD_ERROR(content, args...) \ { __android_log_print(ANDROID_LOG_ERROR, "MS_LITE", "%s|%d: " #content "\r\n", __func__, __LINE__, ##args); } + +#elif defined(MS_COMPILE_OHOS) // For OHOS, use hilog. + +#define MINDRT_OHOS_LOG_DOMAIN 0x2102 +#define MINDRT_OHOS_LOG_TAG "MS_LITE" + +#ifdef MS_COMPILE_WITH_OHOS_NDK +// When build with OHOS NDK, use public api of hilog module. +#define THREAD_INFO(content, args...) \ + { OH_LOG_Print(LOG_APP, LOG_INFO, MINDRT_OHOS_LOG_DOMAIN, MINDRT_OHOS_LOG_TAG, "%s:%d " #content, __func__, __LINE__, ##args); } +#define THREAD_ERROR(content, args...) \ + { OH_LOG_Print(LOG_APP, LOG_ERROR, MINDRT_OHOS_LOG_DOMAIN, MINDRT_OHOS_LOG_TAG, "%s:%d " #content, __func__, __LINE__, ##args); } +#else +// When build in OHOS repo, use inner api of hilog module. +#define THREAD_INFO(content, args...) \ + { HiLogPrint(LOG_APP, LOG_INFO, MINDRT_OHOS_LOG_DOMAIN, MINDRT_OHOS_LOG_TAG, "%s:%d " #content, __func__, __LINE__, ##args); } +#define THREAD_ERROR(content, args...) \ + { HiLogPrint(LOG_APP, LOG_ERROR, MINDRT_OHOS_LOG_DOMAIN, MINDRT_OHOS_LOG_TAG, "%s:%d " #content, __func__, __LINE__, ##args); } +#endif + #else +#define THREAD_INFO(content, ...) #define THREAD_ERROR(content, ...) #endif #endif diff --git a/mindspore/core/mindrt/src/thread/threadpool.cc b/mindspore/core/mindrt/src/thread/threadpool.cc index c56e0425..2301be8c 100644 --- a/mindspore/core/mindrt/src/thread/threadpool.cc +++ b/mindspore/core/mindrt/src/thread/threadpool.cc @@ -68,10 +68,11 @@ void Worker::SetAffinity() { #ifdef _WIN32 SetWindowsSelfAffinity(core_id_); #elif defined(BIND_CORE) -#ifdef __ANDROID__ +#if defined(__ANDROID__) || defined(MS_COMPILE_OHOS) + THREAD_INFO("thread: %d, mask: %lu", gettid(), mask_.__bits[0]); int ret = sched_setaffinity(gettid(), sizeof(cpu_set_t), &mask_); if (ret != THREAD_OK) { - THREAD_ERROR("bind thread %d to cpu failed. ERROR %d", gettid(), errno); + THREAD_ERROR("bind thread %d to cpu failed. ERROR %{public}d", gettid(), errno); } return; #else @@ -111,7 +112,7 @@ void Worker::Run() { } #if !defined(__APPLE__) && !defined(_MSC_VER) static std::atomic_int index = {0}; - (void)pthread_setname_np(pthread_self(), ("KernelThread_" + std::to_string(index++)).c_str()); + (void)pthread_setname_np(pthread_self(), ("OS_Kernel_" + std::to_string(index++)).c_str()); #endif #ifdef PLATFORM_86 // Some CPU kernels need set the flush zero mode to improve performance. diff --git a/mindspore/lite/BUILD.gn b/mindspore/lite/BUILD.gn index a774b58c..f7e465e2 100644 --- a/mindspore/lite/BUILD.gn +++ b/mindspore/lite/BUILD.gn @@ -71,9 +71,14 @@ import("//build/ohos.gni") +declare_args() { + mindspore_feature_nnrt_metagraph = false +} + ohos_group("mindspore") { deps = [ ":mindspore_lib", + ":mindspore_ndk", ":mindspore_train_lib", "mindir:mindir_lib", "src/litert/js_api:mindsporelite_napi" @@ -180,7 +185,6 @@ lite_mindrt_sources = [ ] all_lite_sources += cxx_api_sources -all_lite_sources += c_api_sources all_lite_sources += api_source all_lite_sources += control_flow_kernel_sources all_lite_sources += experimental_sources @@ -368,7 +372,6 @@ ohos_shared_library("mindspore_lib") { sources = all_sources include_dirs = [ - "//base/hiviewdfx/hilog/interfaces/native/innerkits/include", "//third_party/flatbuffers/include", "./", "../", @@ -384,6 +387,7 @@ ohos_shared_library("mindspore_lib") { "../ccsrc/", "src/litert/kernel/cpu/", "../core/mindrt/src/", + "//foundation/ai/neural_network_runtime/", ] defines = [ @@ -426,24 +430,29 @@ ohos_shared_library("mindspore_lib") { external_deps = [ "hilog:libhilog" ] - output_name = "libmindspore-lite.huawei" + output_name = "libmindspore-lite" output_extension = "so" innerapi_tags = [ "platformsdk" ] SUPPORT_NNRT = true if (SUPPORT_NNRT) { + if (mindspore_feature_nnrt_metagraph) { + defines += [ "SUPPORT_NNRT_METAGRAPH" ] + print("enabled feature: mindspore_feature_nnrt_metagraph") + } sources += [ "src/litert/delegate/nnrt/checker/primitive_check.cc", "src/litert/delegate/nnrt/nnrt_delegate.cc", "src/litert/delegate/nnrt/nnrt_model_kernel.cc", ] include_dirs += [ - "//foundation/ai/neural_network_runtime", "src/delegate/nnrt/include", "../../mindspore/core/ir", "mindir/include", "mindir/inner_headers", ] + external_deps += [ "neural_network_runtime:nnrt_target" ] + deps += [ "mindir:mindir_lib" ] defines += [ "SUPPORT_NNRT" ] } @@ -461,6 +470,67 @@ ohos_shared_library("mindspore_lib") { subsystem_name = "thirdparty" } +# NDK lib +ohos_shared_library("mindspore_ndk") { + deps = [ + ":mindspore_lib", + ":mindspore_train_lib" + ] + + sources = c_api_sources + + include_dirs = [ + "//base/hiviewdfx/hilog/interfaces/native/innerkits/include", + "//third_party/flatbuffers/include", + "./", + "../", + "../../", + "../core", + "src", + "src/c_api/", + "../ccsrc/plugin/device/cpu/kernel/", + "../core/mindrt/src/", + "../core/mindrt/include/", + "../../third_party/", + "./schema/", + "../ccsrc/", + "//foundation/ai/neural_network_runtime/", + ] + + defines = [ + "SUPPORT_NNRT", + "MS_COMPILE_OHOS", + "PRIMITIVE_WRITEABLE", + "RUNTIME_PASS_CLIP", + "ENABLE_MULTI_LAYOUT", + "VERSION_STR=\"2.1.0\"", + ] + + configs = [ + ":mindspore_api", + ":disable_android", + ":secure_option", + ] + + external_deps = [ "neural_network_runtime:nnrt_target" ] + + remove_configs = [ "//build/config/compiler:no_rtti" ] + + output_name = "libmindspore_lite_ndk" + output_extension = "so" + innerapi_tags = [ "ndk"] + cflags_cc = [ + "-Wno-ignored-qualifiers", + "-Wunused-private-field", + "-Wno-unused-private-field", + "-Wno-inconsistent-missing-override", + "-Wno-macro-redefined", + "-Wno-constant-conversion", + ] + part_name = "mindspore" + subsystem_name = "thirdparty" +} + # Train library expression_cxx_api_sources = [ "src/litert/cxx_api/expression/net.cc", @@ -614,7 +684,6 @@ ohos_shared_library("mindspore_train_lib") { sources = all_train_sources include_dirs = [ - "//base/hiviewdfx/hilog/interfaces/native/innerkits/include", "//third_party/flatbuffers/include", "./", "../", @@ -698,6 +767,9 @@ config("disable_android") { "-U__ANDROID__", "-U__ANDROID_API__", ] + ldflags = [ + "-Wl,--no-as-needed", + ] } config("secure_option") { diff --git a/mindspore/lite/CMakeLists.txt b/mindspore/lite/CMakeLists.txt index 72337f70..1faf2f38 100644 --- a/mindspore/lite/CMakeLists.txt +++ b/mindspore/lite/CMakeLists.txt @@ -298,8 +298,9 @@ elseif(TOOLCHAIN_NAME STREQUAL "ohos-lite") elseif(TOOLCHAIN_NAME STREQUAL "ohos") set(TARGET_OHOS on) add_compile_definitions(MS_COMPILE_OHOS) - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wno-unused-command-line-argument -Wno-c++17-extensions") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-unused-command-line-argument -Wno-c++17-extensions") + add_compile_definitions(MS_COMPILE_WITH_OHOS_NDK) + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wno-unused-command-line-argument -Wno-c++17-extensions -Wno-deprecated-builtins") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-unused-command-line-argument -Wno-c++17-extensions -Wno-deprecated-builtins") endif() if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU" AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS 7.3.0 diff --git a/mindspore/lite/include/lite_types.h b/mindspore/lite/include/lite_types.h index 017e98a8..860390d5 100644 --- a/mindspore/lite/include/lite_types.h +++ b/mindspore/lite/include/lite_types.h @@ -42,6 +42,7 @@ typedef enum { DT_NPU, /**< NPU device type */ DT_ASCEND, /**< ASCEND device type */ DT_CUSTOM, /**< EXTEND device type */ + DT_NNRT, /**< NNRT device type */ DT_END /**< NO device type */ } DeviceType; diff --git a/mindspore/lite/include/model.h b/mindspore/lite/include/model.h index 93e27ea9..b96c7e35 100644 --- a/mindspore/lite/include/model.h +++ b/mindspore/lite/include/model.h @@ -25,6 +25,7 @@ namespace mindspore { namespace schema { struct Tensor; } // namespace schema + namespace lite { typedef enum { ModelType_MSLite, ModelType_MindIR } LiteModelType; @@ -62,7 +63,10 @@ struct MS_API LiteGraph { bool model_obfuscated_ = false; std::vector deobf_prims_; #endif + + std::string ToString() const; }; + struct MS_API Model { LiteGraph graph_; char *buf = nullptr; diff --git a/mindspore/lite/include/registry/converter_context.h b/mindspore/lite/include/registry/converter_context.h index 2d72b200..4bc92599 100644 --- a/mindspore/lite/include/registry/converter_context.h +++ b/mindspore/lite/include/registry/converter_context.h @@ -39,7 +39,9 @@ enum MS_API FmkType : int { kFmkTypeMs = 3, kFmkTypeTflite = 4, kFmkTypePytorch = 5, - kFmkTypeMsLite = 6, + kFmkTypeThirdParty = 6, + kFmkTypeMsLite = 7, + kFmkTypeEnd = 8, // For range check purpose, valid range: [0, kFmkTypeEnd) }; /// \brief ConverterParameters defined read-only converter parameters used by users in ModelParser. diff --git a/mindspore/lite/mindir/include/mindir.h b/mindspore/lite/mindir/include/mindir.h index ca811dce..f47cad8c 100644 --- a/mindspore/lite/mindir/include/mindir.h +++ b/mindspore/lite/mindir/include/mindir.h @@ -151,6 +151,8 @@ int64_t MindIR_Conv2DFusion_GetOutChannel(ConstPrimitivePtr primitive); void MindIR_Conv2DFusion_SetOutChannel(PrimitivePtr *primitive, int64_t out_channel); ActivationType MindIR_Conv2DFusion_GetActivationType(ConstPrimitivePtr primitive); void MindIR_Conv2DFusion_SetActivationType(PrimitivePtr *primitive, ActivationType activation_type); +Format MindIR_Conv2DFusion_GetFormat(ConstPrimitivePtr primitive); +void MindIR_Conv2DFusion_SetFormat(PrimitivePtr *primitive, Format format); // ********** Conv2dTransposeFusion ********** PrimitivePtr MindIR_Conv2dTransposeFusion_CreatePrimitive( diff --git a/mindspore/lite/mindir/src/mindir.cc b/mindspore/lite/mindir/src/mindir.cc index 7fc9c00e..374bbef5 100644 --- a/mindspore/lite/mindir/src/mindir.cc +++ b/mindspore/lite/mindir/src/mindir.cc @@ -1215,6 +1215,46 @@ void MindIR_Conv2DFusion_SetActivationType(PrimitivePtr *primitive, ActivationTy } } +Format MindIR_Conv2DFusion_GetFormat(ConstPrimitivePtr primitive) { + if (primitive != nullptr) { + auto prim = static_cast(primitive); + auto value = prim->value_as_Conv2DFusion(); + if (prim != nullptr && value != nullptr) { + return static_cast(value->format()); + } else { + Format en = static_cast(0); + return en; + } + } else { + Format en = static_cast(0); + return en; + } +} + +void MindIR_Conv2DFusion_SetFormat(PrimitivePtr *primitive, Format format) { + if (primitive != nullptr && *primitive != nullptr) { + auto prim = static_cast(*primitive); + auto value = prim->value_as_Conv2DFusion(); + if (prim != nullptr && value != nullptr) { + flatbuffers::FlatBufferBuilder fbb; + auto ops_offset = schema::CreateConv2DFusion( + fbb, static_cast(format), + fbb.CreateVector(value->kernel_size()->data(), value->kernel_size()->size()), + fbb.CreateVector(value->stride()->data(), value->stride()->size()), + fbb.CreateVector(value->dilation()->data(), value->dilation()->size()), + static_cast(value->pad_mode()), + fbb.CreateVector(value->pad_list()->data(), value->pad_list()->size()), 0, value->group(), value->in_channel(), + value->out_channel(), static_cast(value->activation_type())); + auto prim_offset = + schema::CreatePrimitive(fbb, static_cast(NODE_TYPE_CONV2D_FUSION), ops_offset.o); + fbb.Finish(prim_offset); + auto new_addr = MindIRMemoryManager::GetInstance()->CreatePrimitiveFromBuilder(fbb, prim); + auto ret_value = flatbuffers::GetMutableRoot(new_addr); + *primitive = ret_value; + } + } +} + // ********** Conv2dTransposeFusion ********** PrimitivePtr MindIR_Conv2dTransposeFusion_CreatePrimitive( const std::vector &kernel_size, const std::vector &stride, const std::vector &dilation, diff --git a/mindspore/lite/mindir/src/mindir_tensor.cc b/mindspore/lite/mindir/src/mindir_tensor.cc index 9ec2d0e4..2db4ce8b 100644 --- a/mindspore/lite/mindir/src/mindir_tensor.cc +++ b/mindspore/lite/mindir/src/mindir_tensor.cc @@ -134,7 +134,7 @@ void MindIR_Tensor_SetDataType(TensorPtr *tensor, DataType data_type) { name = fbb.CreateString(value->name()->c_str(), value->name()->size()); } auto ops_offset = - schema::CreateTensor(fbb, 0, value->dataType(), dims, static_cast(value->format()), 0, 0, data, + schema::CreateTensor(fbb, 0, data_type, dims, static_cast(value->format()), 0, 0, data, ConvertQuantParams(fbb, value->quantParams()), 0, name); fbb.Finish(ops_offset); auto new_addr = MindIRMemoryManager::GetInstance()->CreateTensorFromBuilder(fbb, value); diff --git a/mindspore/lite/mindir/src/utils.cc b/mindspore/lite/mindir/src/utils.cc index 28d66ceb..b044f414 100644 --- a/mindspore/lite/mindir/src/utils.cc +++ b/mindspore/lite/mindir/src/utils.cc @@ -22,7 +22,7 @@ namespace lite { // ********** PrimitiveBase ********** NodeType MindIR_Primitive_GetType(PrimitivePtr primitive) { - auto prim = flatbuffers::GetMutableRoot(primitive); + auto prim = static_cast(primitive); auto type = prim->value_type(); return static_cast(type); } diff --git a/mindspore/lite/src/CMakeLists.txt b/mindspore/lite/src/CMakeLists.txt index 5afccc87..de1781cd 100644 --- a/mindspore/lite/src/CMakeLists.txt +++ b/mindspore/lite/src/CMakeLists.txt @@ -410,6 +410,11 @@ add_subdirectory(common) add_library(lite_src_mid OBJECT ${LITE_SRC}) add_dependencies(lite_src_mid lite_src_common_mid fbs_src fbs_inner_src) +if(SUPPORT_NNRT) + add_subdirectory(litert/delegate/nnrt) + target_link_libraries(lite_src_mid nnrt_mid) +endif() + if(MSLITE_ENABLE_ACL) include_directories(${TOP_DIR}/graphengine/910/inc/external) if(NOT (MSLITE_ENABLE_CLOUD_FUSION_INFERENCE OR MSLITE_ENABLE_CLOUD_INFERENCE)) @@ -497,7 +502,6 @@ if(MSLITE_ENABLE_MINDRT) endif() if (SUPPORT_NNRT) - add_subdirectory(litert/delegate/nnrt) target_link_libraries(mindspore-lite nnrt_mid) target_link_libraries(mindspore-lite_static nnrt_mid) endif() diff --git a/mindspore/lite/src/common/context_util.cc b/mindspore/lite/src/common/context_util.cc index f011e0d7..0fa4ebd0 100644 --- a/mindspore/lite/src/common/context_util.cc +++ b/mindspore/lite/src/common/context_util.cc @@ -118,6 +118,17 @@ std::shared_ptr CustomDeviceInfoFromCustomDeviceCo MS_CHECK_TRUE_RET(device_info != nullptr, nullptr); return device_info; } + +std::shared_ptr NNRtDeviceInfoFromNNRtDeviceContext( + const lite::DeviceContext &nnrt_context) { + if (nnrt_context.device_type_ != DT_NNRT) { + MS_LOG(ERROR) << "Function input parameter is not NNRt context."; + return nullptr; + } + auto nnrt_info = std::make_shared(); + MS_CHECK_TRUE_RET(nnrt_info != nullptr, nullptr); + return nnrt_info; +} } // namespace mindspore::Context *MSContextFromContext(const std::shared_ptr &context) { @@ -144,7 +155,8 @@ mindspore::Context *MSContextFromContext(const std::shared_ptr &co {DT_GPU, GPUDeviceInfoFromGPUDeviceContext}, {DT_NPU, NPUDeviceInfoFromNPUDeviceContext}, {DT_ASCEND, AscendDeviceInfoFromAscendDeviceContext}, - {DT_CUSTOM, CustomDeviceInfoFromCustomDeviceContext}}; + {DT_CUSTOM, CustomDeviceInfoFromCustomDeviceContext}, + {DT_NNRT, NNRtDeviceInfoFromNNRtDeviceContext}}; for (auto &device_context : context->device_list_) { auto device_type = device_context.device_type_; if (transfer_funcs.find(device_type) == transfer_funcs.end()) { diff --git a/mindspore/lite/src/common/log.cc b/mindspore/lite/src/common/log.cc index 66c0d76b..f1040662 100644 --- a/mindspore/lite/src/common/log.cc +++ b/mindspore/lite/src/common/log.cc @@ -21,6 +21,13 @@ #include #endif +#ifdef MS_COMPILE_OHOS +#define LOG_DOMAIN 0xD002102 +#define LOG_TAG "MS_LITE" +#define FORMAT "[%{public}s:%{public}d] %{public}s# %{public}s" +#include "hilog/log.h" +#endif + // namespace to support utils module definition namespace mindspore constexpr const char *ANDROID_LOG_TAG = "MS_LITE"; namespace mindspore { #if defined(__ANDROID__) @@ -73,17 +80,33 @@ static int GetAndroidLogLevel(LiteLogLevel level) { #ifdef MS_COMPILE_OHOS void PrintHiLog(LiteLogLevel level, const char *file, int line, const char *func, const char *msg) { +#ifdef MS_COMPILE_WITH_OHOS_NDK + // When build with OHOS NDK, use public api of hilog module. if (level == LiteLogLevel::DEBUG) { - OHOS::HiviewDFX::HiLog::Debug(MSLite_LABEL, FORMAT, file, line, func, msg); + OH_LOG_Print(LOG_APP, LOG_DEBUG, LOG_DOMAIN, LOG_TAG, FORMAT, file, line, func, msg); } else if (level == LiteLogLevel::INFO) { - OHOS::HiviewDFX::HiLog::Info(MSLite_LABEL, FORMAT, file, line, func, msg); + OH_LOG_Print(LOG_APP, LOG_INFO, LOG_DOMAIN, LOG_TAG, FORMAT, file, line, func, msg); } else if (level == LiteLogLevel::WARNING) { - OHOS::HiviewDFX::HiLog::Warn(MSLite_LABEL, FORMAT, file, line, func, msg); + OH_LOG_Print(LOG_APP, LOG_WARN, LOG_DOMAIN, LOG_TAG, FORMAT, file, line, func, msg); } else if (level == LiteLogLevel::ERROR) { - OHOS::HiviewDFX::HiLog::Error(MSLite_LABEL, FORMAT, file, line, func, msg); + OH_LOG_Print(LOG_APP, LOG_ERROR, LOG_DOMAIN, LOG_TAG, FORMAT, file, line, func, msg); } else { - OHOS::HiviewDFX::HiLog::Error(MSLite_LABEL, FORMAT, file, line, func, msg); + OH_LOG_Print(LOG_APP, LOG_ERROR, LOG_DOMAIN, LOG_TAG, FORMAT, file, line, func, msg); } +#else + // When build in OHOS repo, use inner api of hilog module. + if (level == LiteLogLevel::DEBUG) { + HILOG_DEBUG(LOG_APP, FORMAT, file, line, func, msg); + } else if (level == LiteLogLevel::INFO) { + HILOG_INFO(LOG_APP, FORMAT, file, line, func, msg); + } else if (level == LiteLogLevel::WARNING) { + HILOG_WARN(LOG_APP, FORMAT, file, line, func, msg); + } else if (level == LiteLogLevel::ERROR) { + HILOG_ERROR(LOG_APP, FORMAT, file, line, func, msg); + } else { + HILOG_ERROR(LOG_APP, FORMAT, file, line, func, msg); + } +#endif } #endif diff --git a/mindspore/lite/src/common/log.h b/mindspore/lite/src/common/log.h index 3002a454..bea21f01 100644 --- a/mindspore/lite/src/common/log.h +++ b/mindspore/lite/src/common/log.h @@ -23,12 +23,6 @@ #include #include "utils/overload.h" -#ifdef MS_COMPILE_OHOS -#define LOG_DOMAIN 0x2102 -#define LOG_TAG "MS_Lite" -#define FORMAT "[%{public}s:%{public}d] %{public}s# %{public}s" -#include "hilog/log.h" -#endif // NOTICE: when relative path of 'log.h' changed, macro 'LITE_LOG_HEAR_FILE_REL_PATH' must be changed #ifndef LITE_LOG_HEAR_FILE_REL_PATH #define LITE_LOG_HEAR_FILE_REL_PATH "mindspore/lite/src/common/log.h" @@ -140,6 +134,9 @@ class LiteLogWriter { LiteLogLevel log_level_; }; +#define MSLOG_IF(level) \ + mindspore::LiteLogWriter(mindspore::LiteLocationInfo(LITE_FILE_NAME, __LINE__, __FUNCTION__), level) < \ + mindspore::LiteLogStream() #define MS_LOG(level) MS_LOG_##level @@ -148,47 +145,6 @@ class LiteLogWriter { #define MS_LOG_WARNING MSLOG_IF(mindspore::LiteLogLevel::WARNING) #define MS_LOG_ERROR MSLOG_IF(mindspore::LiteLogLevel::ERROR) - -#ifdef MS_COMPILE_OHOS -namespace { -constexpr unsigned int MSLITE_DOMAIN_ID_START = 0xD0029A0; -constexpr unsigned int MSLITE_DOMAIN_ID_END = MSLITE_DOMAIN_ID_START + 32; -constexpr unsigned int TEST_DOMAIN_ID = 0xD000F00; -} // namespace - -#define FILE_NAME (__builtin_strrchr(__FILE__, '/') ? __builtin_strrchr(__FILE__, '/') + 1 : __FILE__) -#define FORMAT "[%{public}s:%{public}d] %{public}s# %{public}s" - -#define MSLOG_IF(level) \ - mindspore::LiteLogWriter(mindspore::LiteLocationInfo(LITE_FILE_NAME, __LINE__, __FUNCTION__), level) < \ - mindspore::LiteLogStream() - -enum MSLiteManagerLogLabel { - // Component labels, you can add if needed - COMP_FWK = 0, - // Test label - LABEL_TEST, - // The end of labels, max to the domain id range length 32 - LABEL_END, -}; - -enum MSLiteManagerLogDomain { - DOMAIN_FRAMEWORK = MSLITE_DOMAIN_ID_START + COMP_FWK, // 0xD0029A0 - DOMAIN_TEST = TEST_DOMAIN_ID, // 0xD000F00 - DOMAIN_END = MSLITE_DOMAIN_ID_END, // Max to 0xD002940, keep the sequence and length same as MSLiteManagerLogLabel -}; - -// Keep the sequence and length same as MSLiteManagerLogDomain -static constexpr OHOS::HiviewDFX::HiLogLabel MSLite_LABEL = {LOG_CORE, DOMAIN_FRAMEWORK, "MSLiteFwk"}; - -#else - -#define MSLOG_IF(level) \ - mindspore::LiteLogWriter(mindspore::LiteLocationInfo(LITE_FILE_NAME, __LINE__, __FUNCTION__), level) < \ - mindspore::LiteLogStream() - -#endif - } // namespace mindspore #ifdef Debug diff --git a/mindspore/lite/src/common/ops/populate/custom_populate.cc b/mindspore/lite/src/common/ops/populate/custom_populate.cc index 5e1878b9..13957ed7 100644 --- a/mindspore/lite/src/common/ops/populate/custom_populate.cc +++ b/mindspore/lite/src/common/ops/populate/custom_populate.cc @@ -19,6 +19,9 @@ #include "nnacl/custom_parameter.h" #include "nnacl/split_parameter.h" #include "nnacl/custom_gru_parameter.h" +#include "nnacl/custom_masked_fill_parameter.h" +#include "nnacl/custom_is_inf_parameter.h" +#include "nnacl/custom_tensor_scatter_max_parameter.h" using mindspore::schema::PrimitiveType_Custom; namespace mindspore { @@ -92,6 +95,39 @@ OpParameter *CreateCustomGruParameter() { return reinterpret_cast(param); } +OpParameter *CreateCustomIsInfParameter() { + auto *param = static_cast(malloc(sizeof(CustomIsInfParameter))); + if (param == nullptr) { + MS_LOG(ERROR) << "malloc CustomIsInfParameter failed."; + return nullptr; + } + memset(param, 0, sizeof(CustomIsInfParameter)); + param->op_parameter_.type_ = PrimType_Inner_CustomIsInf; + return reinterpret_cast(param); +} + +OpParameter *CreateCustomTensorScatterMaxParameter() { + auto *param = static_cast(malloc(sizeof(CustomTensorScatterMaxParameter))); + if (param == nullptr) { + MS_LOG(ERROR) << "malloc CustomTensorScatterMaxParameter failed."; + return nullptr; + } + memset(param, 0, sizeof(CustomTensorScatterMaxParameter)); + param->op_parameter_.type_ = PrimType_Inner_CustomTensorScatterMax; + return reinterpret_cast(param); +} + +OpParameter *CreateCustomMaskedFillParameter() { + auto *param = static_cast(malloc(sizeof(CustomMaskedFillParameter))); + if (param == nullptr) { + MS_LOG(ERROR) << "malloc CustomMaskedFillParameter failed."; + return nullptr; + } + memset(param, 0, sizeof(CustomMaskedFillParameter)); + param->op_parameter_.type_ = PrimType_Inner_CustomMaskedFill; + return reinterpret_cast(param); +} + OpParameter *PopulateCustomParameter(const void *prim) { MS_CHECK_TRUE_RET(prim != nullptr, nullptr); auto primitive = static_cast(prim); @@ -131,6 +167,23 @@ OpParameter *PopulateCustomParameter(const void *prim) { return CreateCustomGruParameter(); } else if (type == "CastGatherReduceFusion") { return CreateParam(PrimType_Inner_CastGatherReduceFusion); + } else if (type == "ThirdPartyModel") { + auto *param = static_cast(malloc(sizeof(CustomParameter))); + if (param == nullptr) { + MS_LOG(ERROR) << "malloc CustomParameter failed."; + return nullptr; + } + memset(param, 0, sizeof(CustomParameter)); + param->op_parameter_.type_ = PrimType_Inner_ThirdPartyModel; + // Just use the attr_data pointer to save the prim directly, the inner value is parsed as necessary. + param->attr_data[0] = static_cast(const_cast(prim)); + return reinterpret_cast(param); + } else if (type == "MaskedFill") { + return CreateCustomMaskedFillParameter(); + } else if (type == "TensorScatterMax") { + return CreateCustomTensorScatterMaxParameter(); + } else if (type == "IsInf") { + return CreateCustomIsInfParameter(); } else { MS_LOG(ERROR) << "Unsupported custom type: " << type; } diff --git a/mindspore/lite/src/litert/c_api/context_c.cc b/mindspore/lite/src/litert/c_api/context_c.cc index f614ef09..c5f825aa 100644 --- a/mindspore/lite/src/litert/c_api/context_c.cc +++ b/mindspore/lite/src/litert/c_api/context_c.cc @@ -14,12 +14,17 @@ * limitations under the License. */ #include "include/c_api/context_c.h" -#include "src/litert/c_api/context_c.h" +#include "include/api/context.h" +#include +#include "src/litert/c_api/type_c_private.h" #include "src/common/log_adapter.h" +#ifdef SUPPORT_NNRT +#include "interfaces/kits/c/neural_network_runtime/neural_network_runtime.h" +#endif // ================ Context ================ OH_AI_ContextHandle OH_AI_ContextCreate() { - auto impl = new (std::nothrow) mindspore::ContextC; + auto impl = new (std::nothrow) mindspore::Context(); if (impl == nullptr) { MS_LOG(ERROR) << "memory allocation failed."; return nullptr; @@ -29,7 +34,7 @@ OH_AI_ContextHandle OH_AI_ContextCreate() { void OH_AI_ContextDestroy(OH_AI_ContextHandle *context) { if (context != nullptr && *context != nullptr) { - auto impl = static_cast(*context); + auto impl = static_cast(*context); delete impl; *context = nullptr; } @@ -40,8 +45,8 @@ void OH_AI_ContextSetThreadNum(OH_AI_ContextHandle context, int32_t thread_num) MS_LOG(ERROR) << "param is nullptr."; return; } - auto impl = static_cast(context); - impl->thread_num = thread_num; + auto impl = static_cast(context); + impl->SetThreadNum(thread_num); } int32_t OH_AI_ContextGetThreadNum(const OH_AI_ContextHandle context) { @@ -49,8 +54,8 @@ int32_t OH_AI_ContextGetThreadNum(const OH_AI_ContextHandle context) { MS_LOG(ERROR) << "param is nullptr."; return 0; } - auto impl = static_cast(context); - return impl->thread_num; + auto impl = static_cast(context); + return impl->GetThreadNum(); } void OH_AI_ContextSetThreadAffinityMode(OH_AI_ContextHandle context, int mode) { @@ -58,8 +63,8 @@ void OH_AI_ContextSetThreadAffinityMode(OH_AI_ContextHandle context, int mode) { MS_LOG(ERROR) << "param is nullptr."; return; } - auto impl = static_cast(context); - impl->affinity_mode = mode; + auto impl = static_cast(context); + impl->SetThreadAffinity(mode); return; } @@ -68,8 +73,8 @@ int OH_AI_ContextGetThreadAffinityMode(const OH_AI_ContextHandle context) { MS_LOG(ERROR) << "param is nullptr."; return 0; } - auto impl = static_cast(context); - return impl->affinity_mode; + auto impl = static_cast(context); + return impl->GetThreadAffinityMode(); } void OH_AI_ContextSetThreadAffinityCoreList(OH_AI_ContextHandle context, const int32_t *core_list, size_t core_num) { @@ -78,8 +83,8 @@ void OH_AI_ContextSetThreadAffinityCoreList(OH_AI_ContextHandle context, const i return; } const std::vector vec_core_list(core_list, core_list + core_num); - auto impl = static_cast(context); - impl->affinity_core_list = vec_core_list; + auto impl = static_cast(context); + impl->SetThreadAffinity(vec_core_list); return; } @@ -88,9 +93,18 @@ const int32_t *OH_AI_ContextGetThreadAffinityCoreList(const OH_AI_ContextHandle MS_LOG(ERROR) << "param is nullptr."; return nullptr; } - auto impl = static_cast(context); - *core_num = impl->affinity_core_list.size(); - return impl->affinity_core_list.data(); + auto impl = static_cast(context); + auto affinity_core_list = impl->GetThreadAffinityCoreList(); + *core_num = affinity_core_list.size(); + int32_t *core_list = static_cast(malloc((*core_num) * sizeof(int32_t))); + if (core_list == nullptr) { + MS_LOG(ERROR) << "malloc core_list is null."; + return nullptr; + } + for (size_t i = 0; i < affinity_core_list.size(); i++) { + core_list[i] = affinity_core_list[i]; + } + return core_list; } void OH_AI_ContextSetEnableParallel(OH_AI_ContextHandle context, bool is_parallel) { @@ -98,8 +112,8 @@ void OH_AI_ContextSetEnableParallel(OH_AI_ContextHandle context, bool is_paralle MS_LOG(ERROR) << "param is nullptr."; return; } - auto impl = static_cast(context); - impl->enable_parallel = is_parallel; + auto impl = static_cast(context); + impl->SetEnableParallel(is_parallel); } bool OH_AI_ContextGetEnableParallel(const OH_AI_ContextHandle context) { @@ -107,8 +121,8 @@ bool OH_AI_ContextGetEnableParallel(const OH_AI_ContextHandle context) { MS_LOG(ERROR) << "param is nullptr."; return false; } - auto impl = static_cast(context); - return impl->enable_parallel; + auto impl = static_cast(context); + return impl->GetEnableParallel(); } void OH_AI_ContextAddDeviceInfo(OH_AI_ContextHandle context, OH_AI_DeviceInfoHandle device_info) { @@ -116,25 +130,36 @@ void OH_AI_ContextAddDeviceInfo(OH_AI_ContextHandle context, OH_AI_DeviceInfoHan MS_LOG(ERROR) << "param is nullptr."; return; } - auto impl = static_cast(context); - std::shared_ptr device(static_cast(device_info)); - impl->device_info_list.push_back(device); + auto impl = static_cast(context); + std::shared_ptr device(static_cast(device_info)); + impl->MutableDeviceInfo().push_back(device); } // ================ DeviceInfo ================ OH_AI_DeviceInfoHandle OH_AI_DeviceInfoCreate(OH_AI_DeviceType device_type) { - mindspore::DeviceInfoC *impl = new (std::nothrow) mindspore::DeviceInfoC; + mindspore::DeviceInfoContext *impl; + if (OH_AI_DEVICETYPE_CPU == device_type) { + impl = new (std::nothrow) mindspore::CPUDeviceInfo(); + } else if (OH_AI_DEVICETYPE_GPU == device_type) { + impl = new (std::nothrow) mindspore::GPUDeviceInfo(); + } else if (OH_AI_DEVICETYPE_KIRIN_NPU == device_type) { + impl = new (std::nothrow) mindspore::KirinNPUDeviceInfo(); + } else if (OH_AI_DEVICETYPE_NNRT == device_type) { + impl = new (std::nothrow) mindspore::NNRTDeviceInfo(); + } else { + MS_LOG(ERROR) << "device_type is invalid."; + impl = nullptr; + } if (impl == nullptr) { MS_LOG(ERROR) << "memory allocation failed."; return nullptr; } - impl->device_type = device_type; return static_cast(impl); } void OH_AI_DeviceInfoDestroy(OH_AI_DeviceInfoHandle *device_info) { if (device_info != nullptr && *device_info != nullptr) { - auto impl = static_cast(*device_info); + auto impl = static_cast(*device_info); delete impl; *device_info = nullptr; } @@ -145,8 +170,8 @@ void OH_AI_DeviceInfoSetProvider(OH_AI_DeviceInfoHandle device_info, const char MS_LOG(ERROR) << "param is nullptr."; return; } - auto impl = static_cast(device_info); - impl->provider = provider; + auto impl = static_cast(device_info); + impl->SetProvider(provider); } const char *OH_AI_DeviceInfoGetProvider(const OH_AI_DeviceInfoHandle device_info) { @@ -154,8 +179,14 @@ const char *OH_AI_DeviceInfoGetProvider(const OH_AI_DeviceInfoHandle device_info MS_LOG(ERROR) << "param is nullptr."; return nullptr; } - auto impl = static_cast(device_info); - return impl->provider.c_str(); + auto impl = static_cast(device_info); + char *provider = static_cast(malloc(impl->GetProvider().size() + 1)); + if (provider == nullptr) { + MS_LOG(ERROR) << "malloc provider is null."; + return nullptr; + } + strcpy(provider, impl->GetProvider().c_str()); + return provider; } void OH_AI_DeviceInfoSetProviderDevice(OH_AI_DeviceInfoHandle device_info, const char *device) { @@ -163,8 +194,8 @@ void OH_AI_DeviceInfoSetProviderDevice(OH_AI_DeviceInfoHandle device_info, const MS_LOG(ERROR) << "param is nullptr."; return; } - auto impl = static_cast(device_info); - impl->provider_device = device; + auto impl = static_cast(device_info); + impl->SetProviderDevice(device); } const char *OH_AI_DeviceInfoGetProviderDevice(const OH_AI_DeviceInfoHandle device_info) { @@ -172,8 +203,14 @@ const char *OH_AI_DeviceInfoGetProviderDevice(const OH_AI_DeviceInfoHandle devic MS_LOG(ERROR) << "param is nullptr."; return nullptr; } - auto impl = static_cast(device_info); - return impl->provider_device.c_str(); + auto impl = static_cast(device_info); + char *provider_device = static_cast(malloc(impl->GetProviderDevice().size() + 1)); + if (provider_device == nullptr) { + MS_LOG(ERROR) << "malloc provider_device is null."; + return nullptr; + } + strcpy(provider_device, impl->GetProviderDevice().c_str()); + return provider_device; } OH_AI_DeviceType OH_AI_DeviceInfoGetDeviceType(const OH_AI_DeviceInfoHandle device_info) { @@ -181,8 +218,8 @@ OH_AI_DeviceType OH_AI_DeviceInfoGetDeviceType(const OH_AI_DeviceInfoHandle devi MS_LOG(ERROR) << "param is nullptr."; return OH_AI_DEVICETYPE_INVALID; } - auto impl = static_cast(device_info); - return impl->device_type; + auto impl = static_cast(device_info); + return static_cast(impl->GetDeviceType()); } void OH_AI_DeviceInfoSetEnableFP16(OH_AI_DeviceInfoHandle device_info, bool is_fp16) { @@ -190,9 +227,17 @@ void OH_AI_DeviceInfoSetEnableFP16(OH_AI_DeviceInfoHandle device_info, bool is_f MS_LOG(ERROR) << "param is nullptr."; return; } - auto impl = static_cast(device_info); - if (impl->device_type == OH_AI_DEVICETYPE_CPU || impl->device_type == OH_AI_DEVICETYPE_GPU) { - impl->enable_fp16 = is_fp16; + + auto impl_device = static_cast(device_info); + if (OH_AI_DEVICETYPE_CPU == static_cast(impl_device->GetDeviceType())) { + auto impl = static_cast(device_info); + impl->SetEnableFP16(is_fp16); + } else if (OH_AI_DEVICETYPE_GPU == static_cast(impl_device->GetDeviceType())) { + auto impl = static_cast(device_info); + impl->SetEnableFP16(is_fp16); + } else if (OH_AI_DEVICETYPE_NNRT == static_cast(impl_device->GetDeviceType())) { + auto impl = static_cast(device_info); + impl->SetEnableFP16(is_fp16); } else { MS_LOG(ERROR) << "Unsupported Feature."; } @@ -203,11 +248,19 @@ bool OH_AI_DeviceInfoGetEnableFP16(const OH_AI_DeviceInfoHandle device_info) { MS_LOG(ERROR) << "param is nullptr."; return false; } - auto impl = static_cast(device_info); - if (impl->device_type == OH_AI_DEVICETYPE_CPU || impl->device_type == OH_AI_DEVICETYPE_GPU) { - return impl->enable_fp16; + + auto impl_device = static_cast(device_info); + if (OH_AI_DEVICETYPE_CPU == static_cast(impl_device->GetDeviceType())) { + auto impl = static_cast(device_info); + return impl->GetEnableFP16(); + } else if (OH_AI_DEVICETYPE_GPU == static_cast(impl_device->GetDeviceType())) { + auto impl = static_cast(device_info); + return impl->GetEnableFP16(); + } else if (OH_AI_DEVICETYPE_NNRT == static_cast(impl_device->GetDeviceType())) { + auto impl = static_cast(device_info); + return impl->GetEnableFP16(); } else { - MS_LOG(ERROR) << "Unsupported Feature. device_type: " << impl->device_type; + MS_LOG(ERROR) << "Unsupported Feature. device_type: " << impl_device->GetDeviceType(); return false; } } @@ -217,9 +270,10 @@ void OH_AI_DeviceInfoSetFrequency(OH_AI_DeviceInfoHandle device_info, int freque MS_LOG(ERROR) << "param is nullptr."; return; } - auto impl = static_cast(device_info); - if (impl->device_type == OH_AI_DEVICETYPE_KIRIN_NPU) { - impl->frequency = frequency; + auto impl_device = static_cast(device_info); + if (static_cast(impl_device->GetDeviceType()) == OH_AI_DEVICETYPE_KIRIN_NPU) { + auto impl = static_cast(device_info); + impl->SetFrequency(frequency); } else { MS_LOG(ERROR) << "Unsupported Feature."; } @@ -230,11 +284,231 @@ int OH_AI_DeviceInfoGetFrequency(const OH_AI_DeviceInfoHandle device_info) { // MS_LOG(ERROR) << "param is nullptr."; return -1; } - auto impl = static_cast(device_info); - if (impl->device_type == OH_AI_DEVICETYPE_KIRIN_NPU) { - return impl->frequency; + auto impl_device = static_cast(device_info); + if (static_cast(impl_device->GetDeviceType()) == OH_AI_DEVICETYPE_KIRIN_NPU) { + auto impl = static_cast(device_info); + return impl->GetFrequency(); } else { MS_LOG(ERROR) << "Unsupported Feature."; return -1; } } + +NNRTDeviceDesc *OH_AI_GetAllNNRTDeviceDescs(size_t *num) { + if (num == nullptr) { + MS_LOG(ERROR) << "Input num is null"; + return nullptr; + } +#ifdef SUPPORT_NNRT + *num = 0; + + const size_t *all_device_ids; + uint32_t device_count; + auto ret = OH_NNDevice_GetAllDevicesID(&all_device_ids, &device_count); + if ((ret != OH_NN_SUCCESS) || (device_count == 0)) { + MS_LOG(ERROR) << "NNRT get all device id failed, ret: " << ret; + return nullptr; + } + + NNRTDeviceDesc *desc = (NNRTDeviceDesc *)malloc(sizeof(NNRTDeviceDesc) * device_count); + if (desc == nullptr) { + MS_LOG(ERROR) << "NNRT allocate desc failed"; + return nullptr; + } + + for (uint32_t i = 0; i < device_count; i++) { + desc[i].device_id = all_device_ids[i]; + OH_NN_DeviceType type; + (void)OH_NNDevice_GetType(all_device_ids[i], &type); + desc[i].device_type = static_cast(type); + + const char *name = nullptr; + (void)OH_NNDevice_GetName(all_device_ids[i], &name); + desc[i].device_name[127] = '\0'; + strncpy(desc[i].device_name, name, 127); + } + *num = device_count; + return desc; +#else + return nullptr; +#endif +} + +NNRTDeviceDesc *OH_AI_GetElementOfNNRTDeviceDescs(NNRTDeviceDesc *descs, size_t index) { + if (descs == nullptr) { + MS_LOG(ERROR) << "descs is null"; + return nullptr; + } + return descs + index; +} + +void OH_AI_DestroyAllNNRTDeviceDescs(NNRTDeviceDesc **desc) { + if (desc == nullptr) { + MS_LOG(WARNING) << "desc is null"; + return; + } + free(*desc); + *desc = nullptr; +} + +size_t OH_AI_GetDeviceIdFromNNRTDeviceDesc(const NNRTDeviceDesc *desc) { + if (desc == nullptr) { + MS_LOG(ERROR) << "NNRT desc is null"; + return 0; + } + return desc->device_id; +} + +const char *OH_AI_GetNameFromNNRTDeviceDesc(const NNRTDeviceDesc *desc) { + if (desc == nullptr) { + MS_LOG(ERROR) << "NNRT desc is null"; + return nullptr; + } + return desc->device_name; +} + +OH_AI_NNRTDeviceType OH_AI_GetTypeFromNNRTDeviceDesc(const NNRTDeviceDesc *desc) { + if (desc == nullptr) { + MS_LOG(ERROR) << "NNRT desc is null"; + return OH_AI_NNRTDeviceType::OH_AI_NNRTDEVICE_OTHERS; + } + return desc->device_type; +} + +OH_AI_DeviceInfoHandle OH_AI_CreateNNRTDeviceInfoByName(const char *name) { + size_t num = 0; + NNRTDeviceDesc *desc = OH_AI_GetAllNNRTDeviceDescs(&num); + if (desc == nullptr) { + MS_LOG(ERROR) << "Get all device desc failed"; + return nullptr; + } + + OH_AI_DeviceInfoHandle handle = nullptr; + for (size_t i = 0; i < num; i++) { + if (strncmp(desc[i].device_name, name, NNRT_DEVICE_NAME_MAX - 1) == 0) { + handle = OH_AI_DeviceInfoCreate(OH_AI_DEVICETYPE_NNRT); + OH_AI_DeviceInfoSetDeviceId(handle, desc[i].device_id); + break; + } + } + OH_AI_DestroyAllNNRTDeviceDescs(&desc); + return handle; +} + +OH_AI_DeviceInfoHandle OH_AI_CreateNNRTDeviceInfoByType(OH_AI_NNRTDeviceType type) { + size_t num = 0; + NNRTDeviceDesc *desc = OH_AI_GetAllNNRTDeviceDescs(&num); + if (desc == nullptr) { + MS_LOG(ERROR) << "Get all device desc failed"; + return nullptr; + } + + OH_AI_DeviceInfoHandle handle = nullptr; + for (size_t i = 0; i < num; i++) { + if (desc[i].device_type == type) { + handle = OH_AI_DeviceInfoCreate(OH_AI_DEVICETYPE_NNRT); + OH_AI_DeviceInfoSetDeviceId(handle, desc[i].device_id); + break; + } + } + OH_AI_DestroyAllNNRTDeviceDescs(&desc); + return handle; +} + +void OH_AI_DeviceInfoSetDeviceId(OH_AI_DeviceInfoHandle device_info, size_t device_id) { + if (device_info == nullptr) { + MS_LOG(ERROR) << "device info is null"; + return; + } + if (OH_AI_DeviceInfoGetDeviceType(device_info) != OH_AI_DEVICETYPE_NNRT) { + MS_LOG(ERROR) << "Set device_id of non-NNRT device is not allowable, ignored"; + return; + } + auto impl = reinterpret_cast(device_info); + impl->SetDeviceID(device_id); +} + +size_t OH_AI_DeviceInfoGetDeviceId(const OH_AI_DeviceInfoHandle device_info) { + if (device_info == nullptr) { + MS_LOG(ERROR) << "device info is null"; + return 0; + } + if (OH_AI_DeviceInfoGetDeviceType(device_info) != OH_AI_DEVICETYPE_NNRT) { + MS_LOG(ERROR) << "Get device_id of non-NNRT device is not allowable, ignored"; + return 0; + } + auto impl = reinterpret_cast(device_info); + return impl->GetDeviceID(); +} + +void OH_AI_DeviceInfoSetPerformanceMode(OH_AI_DeviceInfoHandle device_info, OH_AI_PerformanceMode mode) { + if (device_info == nullptr) { + MS_LOG(ERROR) << "device info is null"; + return; + } + if (OH_AI_DeviceInfoGetDeviceType(device_info) != OH_AI_DEVICETYPE_NNRT) { + MS_LOG(ERROR) << "Set performance_mode of non-NNRT device is not allowable, ignored"; + return; + } + auto impl = reinterpret_cast(device_info); + impl->SetPerformanceMode(mode); +} + +OH_AI_PerformanceMode OH_AI_DeviceInfoGetPerformanceMode(const OH_AI_DeviceInfoHandle device_info) { + if (device_info == nullptr) { + MS_LOG(ERROR) << "device info is null"; + return OH_AI_PERFORMANCE_NONE; + } + if (OH_AI_DeviceInfoGetDeviceType(device_info) != OH_AI_DEVICETYPE_NNRT) { + MS_LOG(ERROR) << "Get performance_mode of non-NNRT device is not allowable, ignored"; + return OH_AI_PERFORMANCE_NONE; + } + auto impl = reinterpret_cast(device_info); + return static_cast(impl->GetPerformanceMode()); +} + +void OH_AI_DeviceInfoSetPriority(OH_AI_DeviceInfoHandle device_info, OH_AI_Priority priority) { + if (device_info == nullptr) { + MS_LOG(ERROR) << "device info is null"; + return; + } + if (OH_AI_DeviceInfoGetDeviceType(device_info) != OH_AI_DEVICETYPE_NNRT) { + MS_LOG(ERROR) << "Set priority of non-NNRT device is not allowable, ignored"; + return; + } + auto impl = reinterpret_cast(device_info); + impl->SetPriority(priority); +} + +OH_AI_Priority OH_AI_DeviceInfoGetPriority(const OH_AI_DeviceInfoHandle device_info) { + if (device_info == nullptr) { + MS_LOG(ERROR) << "device info is null"; + return OH_AI_PRIORITY_NONE; + } + if (OH_AI_DeviceInfoGetDeviceType(device_info) != OH_AI_DEVICETYPE_NNRT) { + MS_LOG(ERROR) << "Get priority of non-NNRT device is not allowable, ignored"; + return OH_AI_PRIORITY_NONE; + } + auto impl = reinterpret_cast(device_info); + return static_cast(impl->GetPriority()); +} + +OH_AI_API OH_AI_Status OH_AI_DeviceInfoAddExtension(OH_AI_DeviceInfoHandle device_info, + const char *name, const char*value, size_t value_size) { + if (device_info == nullptr) { + MS_LOG(ERROR) << "device info is null"; + return OH_AI_STATUS_LITE_NULLPTR; + } + if (OH_AI_DeviceInfoGetDeviceType(device_info) != OH_AI_DEVICETYPE_NNRT) { + MS_LOG(ERROR) << "Add extension to non-NNRT device is not allowable, ignored"; + return OH_AI_STATUS_LITE_ERROR; + } + auto impl = reinterpret_cast(device_info); + mindspore::Extension extension; + extension.name = std::string(name); + extension.value = std::vector(value, value + value_size); + std::vector extension_list = impl->GetExtensions(); + extension_list.push_back(extension); + impl->SetExtensions(extension_list); + return OH_AI_STATUS_SUCCESS; +} \ No newline at end of file diff --git a/mindspore/lite/src/litert/c_api/context_c.h b/mindspore/lite/src/litert/c_api/context_c.h index 076f4d1f..dc88b8a4 100644 --- a/mindspore/lite/src/litert/c_api/context_c.h +++ b/mindspore/lite/src/litert/c_api/context_c.h @@ -21,27 +21,4 @@ #include #include "include/c_api/types_c.h" -namespace mindspore { -class Allocator; -class Delegate; - -typedef struct DeviceInfoC { - OH_AI_DeviceType device_type; - bool enable_fp16 = false; - int frequency = 3; - std::string provider; - std::string provider_device; - std::shared_ptr allocator = nullptr; -} DeviceInfoC; - -typedef struct ContextC { - std::vector> device_info_list; - int32_t thread_num = 2; - bool enable_parallel = false; - std::vector affinity_core_list; - int affinity_mode = 0; - int delegate_mode = 0; - std::shared_ptr delegate = nullptr; -} ContextC; -} // namespace mindspore #endif // MINDSPORE_LITE_SRC_RUNTIME_C_API_CONTEXT_C_H_ diff --git a/mindspore/lite/src/litert/c_api/model_c.cc b/mindspore/lite/src/litert/c_api/model_c.cc index 802df6b1..9da52d76 100644 --- a/mindspore/lite/src/litert/c_api/model_c.cc +++ b/mindspore/lite/src/litert/c_api/model_c.cc @@ -17,321 +17,135 @@ #include #include #include "include/api/context.h" +#include #include "include/api/types.h" #include "src/litert/cxx_api/tensor/tensor_impl.h" #include "src/litert/cxx_api/converters.h" -#include "src/litert/lite_session.h" -#include "src/litert/cpu_info.h" +#include "src/litert//cxx_api/model/model_impl.h" namespace mindspore { class ModelC { - public: - ModelC() : session_(nullptr), context_(nullptr) {} +public: + ModelC() : model_(nullptr) {} ~ModelC() { - for (auto &impl : tensor_map_) { - delete impl.second; + for (auto in : inputs_) { + delete in; + } + for (auto out : outputs_) { + delete out; + } + for (auto out : outputs_train_) { + delete out; } } - Status Build(const void *model_data, size_t data_size, ModelType model_type, const ContextC *model_context); - Status Build(const std::string &model_path, ModelType model_type, const ContextC *model_context); - Status Resize(const std::vector &inputs, const std::vector> &shapes); - - Status Predict(const OH_AI_TensorHandle *inputs, size_t input_num, OH_AI_TensorHandle **outputs, size_t *output_num, - const OH_AI_KernelCallBack &before, const OH_AI_KernelCallBack &after); - - LiteTensorImpl **GetInputs(size_t *input_num); - LiteTensorImpl **GetOutputs(size_t *output_num); + MSTensor **GetInputs(size_t *input_num); + MSTensor **GetOutputs(size_t *output_num); + mindspore::MSKernelCallBack TransCallBack(const OH_AI_KernelCallBack &oh_callback); + std::shared_ptr model_; + std::shared_ptr context_; - private: - Status RunGraph(const OH_AI_KernelCallBack &before, const OH_AI_KernelCallBack &after); - void ResetTensorData(std::vector old_data, std::vector tensors); - LiteTensorImpl *TensorToTensorImpl(mindspore::lite::Tensor *tensor); - - private: - std::shared_ptr session_ = nullptr; - std::shared_ptr context_ = nullptr; - std::map tensor_map_; - std::vector inputs_; - std::vector outputs_; - bool is_already_built = false; +private: + MSTensor **GetOutputsTensor(size_t *output_num, std::vector *vec_tensors); + std::vector inputs_; + std::vector outputs_; + std::vector outputs_train_; }; -Status ModelC::Build(const void *model_data, size_t data_size, ModelType model_type, const ContextC *model_context) { - if (is_already_built) { - MS_LOG(ERROR) << "The model is already built."; - return kLiteModelRebuild; - } - if (!PlatformInstructionSetSupportCheck()) { - MS_LOG(ERROR) << "The platform exist don't support's instruction."; - return kLiteNotSupport; - } - if(context_.get() != model_context){ - context_.reset(model_context); - } - session_ = std::make_shared(); - if (session_ == nullptr) { - MS_LOG(ERROR) << "create session failed"; - return kLiteNullptr; - } - auto ret = session_->Init(ContextUtils::Convert(model_context)); - if (ret != mindspore::lite::RET_OK) { - MS_LOG(ERROR) << "init session failed"; - return static_cast(ret); - } - ret = session_->LoadModelAndCompileByBuf(static_cast(model_data), model_type, data_size); - if (ret != RET_OK) { - MS_LOG(ERROR) << "Load and compile failed"; - return static_cast(ret); - } - is_already_built = true; - return static_cast(kSuccess); -} - -Status ModelC::Build(const std::string &model_path, ModelType model_type, const ContextC *model_context) { - if (is_already_built) { - MS_LOG(ERROR) << "The model is already built."; - return kLiteModelRebuild; - } - if (!PlatformInstructionSetSupportCheck()) { - MS_LOG(ERROR) << "The platform exist don't support's instruction."; - return kLiteNotSupport; - } - if(context_.get() != model_context){ - context_.reset(model_context); - } - session_ = std::make_shared(); - if (session_ == nullptr) { - MS_LOG(ERROR) << "create session failed"; - return kLiteNullptr; - } - auto ret = session_->Init(ContextUtils::Convert(model_context)); - if (ret != mindspore::lite::RET_OK) { - MS_LOG(ERROR) << "init session failed"; - return static_cast(ret); +MSTensor **ModelC::GetInputs(size_t *input_num) { + if (model_ == nullptr) { + MS_LOG(ERROR) << "model_ is nullptr."; + return nullptr; } - ret = session_->LoadModelAndCompileByPath(model_path, model_type); - if (ret != RET_OK) { - MS_LOG(ERROR) << "Load and compile failed"; - return static_cast(ret); + if (!inputs_.empty()) { + *input_num = inputs_.size(); + return inputs_.data(); } - is_already_built = true; - return static_cast(kSuccess); -} -Status ModelC::Resize(const std::vector &inputs, const std::vector> &shapes) { - std::vector inner_input; - size_t input_num = inputs.size(); - for (size_t i = 0; i < input_num; i++) { - auto input = inputs[i]; - if (input == nullptr || input->lite_tensor() == nullptr) { - MS_LOG(ERROR) << "Input tensor is null."; - return kLiteInputTensorError; + auto inputs = model_->GetInputs(); + *input_num = inputs.size(); + inputs_.resize(inputs.size(), nullptr); + for (size_t i = 0; i < inputs.size(); i++) { + inputs_[i] = new (std::nothrow) MSTensor(inputs[i].impl()); + if (inputs_[i] == nullptr) { + inputs_.clear(); + return nullptr; } - inner_input.push_back(input->lite_tensor()); } - size_t shape_num = shapes.size(); - std::vector> inner_shapes(shape_num); - for (size_t i = 0; i < shape_num; i++) { - std::transform(shapes[i].begin(), shapes[i].end(), std::back_inserter(inner_shapes[i]), - [](int64_t value) { return static_cast(value); }); - } - if (session_ == nullptr) { - MS_LOG(ERROR) << "Session implement is null."; - return kLiteNullptr; - } - auto ret = session_->Resize(inner_input, inner_shapes); - return static_cast(ret); + return inputs_.data(); } -void ModelC::ResetTensorData(std::vector old_data, std::vector tensors) { - for (size_t j = 0; j < old_data.size(); j++) { - tensors.at(j)->set_data(old_data.at(j)); +MSTensor **ModelC::GetOutputs(size_t *output_num) { + if (model_->GetTrainMode() == true) { + return GetOutputsTensor(output_num, &outputs_train_); + } else { + return GetOutputsTensor(output_num, &outputs_); } } -Status ModelC::Predict(const OH_AI_TensorHandle *inputs, size_t input_num, OH_AI_TensorHandle **outputs, - size_t *output_num, const OH_AI_KernelCallBack &before, const OH_AI_KernelCallBack &after) { - if (outputs == nullptr || session_ == nullptr) { - MS_LOG(ERROR) << "param is nullptr."; - return kLiteError; +MSTensor **ModelC::GetOutputsTensor(size_t *output_num, std::vector *vec_tensors) { + if (model_ == nullptr) { + MS_LOG(ERROR) << "model_ is nullptr."; + return nullptr; } - auto model_inputs = session_->GetInputs(); - if (model_inputs.size() != input_num) { - MS_LOG(ERROR) << "Wrong input size."; - return kLiteError; + if (!vec_tensors->empty()) { + *output_num = vec_tensors->size(); + return vec_tensors->data(); } - std::vector old_data; - for (size_t i = 0; i < input_num; i++) { - auto real_input = model_inputs[i]; - auto user_input = static_cast(inputs[i]); - if (user_input->DataType() != static_cast(real_input->data_type())) { - ResetTensorData(old_data, model_inputs); - MS_LOG(ERROR) << "DataType does not match, input:" << user_input->Name() - << ", real:" << real_input->tensor_name(); - return kLiteInputTensorError; - } - if (user_input->Data() == nullptr) { - ResetTensorData(old_data, model_inputs); - MS_LOG(ERROR) << "Tensor " << user_input->Name() << " has no data."; - return kLiteInputTensorError; - } - // GPU tensor can't manipulate CPU memory which the user provides. - // When model input is GPU tensor and user input is NOT GPU data, - // just free model input's data for late GPU Tensor filling. - if (IS_OPENCL_ALLOCATOR(real_input->allocator()) && (!IS_OPENCL_ALLOCATOR(user_input->GetAllocator()))) { - real_input->FreeData(); - } - old_data.push_back(real_input->data()); // Save original data in model tensors. - - if (real_input->data_type() == kObjectTypeString) { - std::vector shape; - std::transform(user_input->Shape().begin(), user_input->Shape().end(), std::back_inserter(shape), - [](int64_t value) { return static_cast(value); }); - real_input->set_shape(shape); - real_input->set_data(user_input->MutableData()); - } else { - if (user_input->MutableData() != real_input->data()) { - if (real_input->Size() != user_input->DataSize()) { - ResetTensorData(old_data, model_inputs); - MS_LOG(ERROR) << "Tensor " << user_input->Name() << " has wrong data size."; - return kLiteInputTensorError; - } - if (!IS_OPENCL_ALLOCATOR(real_input->allocator())) { - real_input->set_data(user_input->MutableData()); - } else { - // Use outside CPU data to fill GPU Tensor. - auto dst_data = real_input->MutableData(); - auto src_data = user_input->MutableData(); - (void)memcpy(dst_data, src_data, real_input->Size()); - } - } - } - } - auto ret = RunGraph(before, after); - ResetTensorData(old_data, model_inputs); - if (ret != kSuccess) { - MS_LOG(ERROR) << "Run graph failed."; - return ret; - } - - *outputs = reinterpret_cast(GetOutputs(output_num)); - return kSuccess; -} - -Status ModelC::RunGraph(const OH_AI_KernelCallBack &before, const OH_AI_KernelCallBack &after) { - KernelCallBack before_call_back = nullptr; - KernelCallBack after_call_back = nullptr; - if (before != nullptr) { - before_call_back = [&](const std::vector &before_inputs, - const std::vector &before_outputs, - const MSCallBackParam &call_param) { - std::vector inputs_impl; - std::vector outputs_impl; - std::vector op_inputs; - std::vector op_outputs; - size_t op_input_num = before_inputs.size(); - for (size_t i = 0; i < op_input_num; i++) { - inputs_impl.emplace_back(before_inputs[i]); - op_inputs.push_back(&(inputs_impl.back())); - } - size_t op_output_num = before_outputs.size(); - for (size_t i = 0; i < op_output_num; i++) { - outputs_impl.emplace_back(before_outputs[i]); - op_outputs.push_back(&(outputs_impl.back())); - } - const OH_AI_CallBackParam op_info = {const_cast(call_param.node_name.c_str()), - const_cast(call_param.node_type.c_str())}; - OH_AI_TensorHandleArray inputs = {op_input_num, op_inputs.data()}; - OH_AI_TensorHandleArray outputs = {op_output_num, op_outputs.data()}; - return before(inputs, outputs, op_info); - }; - } - if (after != nullptr) { - after_call_back = [&](const std::vector &after_inputs, - const std::vector &after_outputs, - const MSCallBackParam &call_param) { - std::vector inputs_impl; - std::vector outputs_impl; - std::vector op_inputs; - std::vector op_outputs; - size_t op_input_num = after_inputs.size(); - for (size_t i = 0; i < op_input_num; i++) { - inputs_impl.emplace_back(after_inputs[i]); - op_inputs.push_back(&(inputs_impl.back())); - } - size_t op_output_num = after_outputs.size(); - for (size_t i = 0; i < op_output_num; i++) { - outputs_impl.emplace_back(after_outputs[i]); - op_outputs.push_back(&(outputs_impl.back())); - } - const OH_AI_CallBackParam op_info = {const_cast(call_param.node_name.c_str()), - const_cast(call_param.node_type.c_str())}; - OH_AI_TensorHandleArray inputs = {op_input_num, op_inputs.data()}; - OH_AI_TensorHandleArray outputs = {op_output_num, op_outputs.data()}; - return after(inputs, outputs, op_info); - }; - } - auto ret = session_->RunGraph(before_call_back, after_call_back); - return static_cast(ret); -} - -LiteTensorImpl *ModelC::TensorToTensorImpl(mindspore::lite::Tensor *tensor) { - LiteTensorImpl *impl = nullptr; - auto iter = tensor_map_.find(tensor); - if (iter != tensor_map_.end()) { - impl = iter->second; - } else { - impl = new (std::nothrow) LiteTensorImpl(tensor); - if (impl == nullptr || impl->lite_tensor() == nullptr) { - MS_LOG(ERROR) << "Create tensor failed."; + auto outputs = model_->GetOutputs(); + *output_num = outputs.size(); + vec_tensors->resize(outputs.size(), nullptr); + for (size_t i = 0; i < outputs.size(); i++) { + (*vec_tensors)[i] = new (std::nothrow) MSTensor(outputs[i].impl()); + if ((*vec_tensors)[i] == nullptr) { + vec_tensors->clear(); return nullptr; } - tensor_map_[tensor] = impl; } - return impl; + return vec_tensors->data(); } -LiteTensorImpl **ModelC::GetInputs(size_t *input_num) { - if (session_ == nullptr || input_num == nullptr) { - MS_LOG(ERROR) << "Session is null."; - return nullptr; - } - auto inputs = session_->GetInputs(); - *input_num = inputs.size(); - if (inputs_.capacity() < *input_num) { - inputs_.reserve(*input_num); - } - inputs_.clear(); - std::transform(inputs.begin(), inputs.end(), std::back_inserter(inputs_), - [&](lite::Tensor *input) { return TensorToTensorImpl(input); }); - return inputs_.data(); -} +mindspore::MSKernelCallBack ModelC::TransCallBack(const OH_AI_KernelCallBack &oh_callback) { + mindspore::MSKernelCallBack call_back = nullptr; + if (oh_callback != nullptr) { + call_back = [&](const std::vector &inputs, + const std::vector &outputs, + const mindspore::MSCallBackParam &opInfo) { + std::vector vec_inputs; + std::vector vec_outputs; + OH_AI_CallBackParam call_back = {const_cast(opInfo.node_name.c_str()), + const_cast(opInfo.node_type.c_str())}; + size_t inputs_handle_num = inputs.size(); + for (size_t i = 0; i < inputs_handle_num; i++) { + vec_inputs.push_back( + static_cast(&(static_cast>(inputs)[i]))); + } + size_t outputs_handle_num = inputs.size(); + for (size_t i = 0; i < outputs_handle_num; i++) { + vec_outputs.push_back( + static_cast(&(static_cast>(outputs)[i]))); + } -LiteTensorImpl **ModelC::GetOutputs(size_t *output_num) { - if (session_ == nullptr || output_num == nullptr) { - MS_LOG(ERROR) << "Session is null."; - return nullptr; - } - auto outputs = session_->GetOutputs(); - *output_num = outputs.size(); - if (outputs_.capacity() < *output_num) { - outputs_.reserve(*output_num); + OH_AI_TensorHandleArray handle_inputs = {inputs_handle_num, vec_inputs.data()}; + OH_AI_TensorHandleArray handle_outputs = {outputs_handle_num, vec_outputs.data()}; + return oh_callback(handle_inputs, handle_outputs, call_back); + }; } - outputs_.clear(); - std::transform(outputs.begin(), outputs.end(), std::back_inserter(outputs_), - [&](std::unordered_map::value_type iter) { - return TensorToTensorImpl(iter.second); - }); - return outputs_.data(); + return call_back; } } // namespace mindspore OH_AI_ModelHandle OH_AI_ModelCreate() { auto impl = new (std::nothrow) mindspore::ModelC(); if (impl == nullptr) { - MS_LOG(ERROR) << "Model implement is null."; + MS_LOG(ERROR) << "Model implement is nullptr."; + return nullptr; + } + impl->model_ = std::make_shared(); + if (impl->model_ == nullptr) { + MS_LOG(ERROR) << "model_ is nullptr."; + delete impl; return nullptr; } return static_cast(impl); @@ -358,55 +172,59 @@ size_t OH_AI_ModelCalcWorkspaceSize(OH_AI_ModelHandle model) { OH_AI_Status OH_AI_ModelBuild(OH_AI_ModelHandle model, const void *model_data, size_t data_size, OH_AI_ModelType model_type, const OH_AI_ContextHandle model_context) { if (model == nullptr || model_data == nullptr || model_context == nullptr) { - MS_LOG(ERROR) << "param is nullptr."; + MS_LOG(ERROR) << "model/model_data/model_context is nullptr."; return OH_AI_STATUS_LITE_NULLPTR; } if (model_type == OH_AI_MODELTYPE_INVALID) { - MS_LOG(ERROR) << "param is invalid."; + MS_LOG(ERROR) << "model_type is invalid."; return OH_AI_STATUS_LITE_PARAM_INVALID; } - mindspore::ContextC *context = static_cast(model_context); + mindspore::Context *context = static_cast(model_context); auto impl = static_cast(model); - auto ret = impl->Build(model_data, data_size, static_cast(model_type), context); + if (impl->context_.get() != context) { + impl->context_.reset(context); + } + auto ret = impl->model_->Build(model_data, data_size, static_cast(model_type), impl->context_); return static_cast(ret.StatusCode()); } OH_AI_Status OH_AI_ModelBuildFromFile(OH_AI_ModelHandle model, const char *model_path, OH_AI_ModelType model_type, const OH_AI_ContextHandle model_context) { if (model == nullptr || model_path == nullptr || model_context == nullptr) { - MS_LOG(ERROR) << "param is nullptr."; + MS_LOG(ERROR) << "model/model_path/model_context is nullptr."; return OH_AI_STATUS_LITE_NULLPTR; } if (model_type == OH_AI_MODELTYPE_INVALID) { - MS_LOG(ERROR) << "param is invalid."; + MS_LOG(ERROR) << "model_type is invalid."; return OH_AI_STATUS_LITE_PARAM_INVALID; } - mindspore::ContextC *context = static_cast(model_context); + mindspore::Context *context = static_cast(model_context); auto impl = static_cast(model); - auto ret = impl->Build(model_path, static_cast(model_type), context); + if (impl->context_.get() != context) { + impl->context_.reset(context); + } + auto ret = impl->model_->Build(model_path, static_cast(model_type), impl->context_); return static_cast(ret.StatusCode()); } OH_AI_Status OH_AI_ModelResize(OH_AI_ModelHandle model, const OH_AI_TensorHandleArray inputs, OH_AI_ShapeInfo *shape_infos, size_t shape_info_num) { if (model == nullptr || shape_infos == nullptr) { - MS_LOG(ERROR) << "param is nullptr."; + MS_LOG(ERROR) << "model/shape_infos is nullptr."; return OH_AI_STATUS_LITE_NULLPTR; } - std::vector vec_inputs; - std::transform(inputs.handle_list, inputs.handle_list + inputs.handle_num, std::back_inserter(vec_inputs), - [](OH_AI_TensorHandle value) { return static_cast(value); }); + std::vector vec_inputs; + for (size_t i = 0; i < inputs.handle_num; ++i) { + vec_inputs.push_back(*static_cast(inputs.handle_list[i])); + } + std::vector> vec_dims; for (size_t i = 0; i < shape_info_num; i++) { std::vector shape(shape_infos[i].shape, shape_infos[i].shape + shape_infos[i].shape_num); - if (std::any_of(shape.begin(), shape.end(), [](int64_t val) { return val < 0 || val > INT32_MAX; })) { - MS_LOG(ERROR) << "Invalid shape: " << shape << ", each dimension must be in [0, INT32_MAX]"; - return OH_AI_STATUS_LITE_PARAM_INVALID; - } vec_dims.push_back(shape); } auto impl = static_cast(model); - auto ret = impl->Resize(vec_inputs, vec_dims); + auto ret = impl->model_->Resize(vec_inputs, vec_dims); return static_cast(ret.StatusCode()); } @@ -414,15 +232,25 @@ OH_AI_Status OH_AI_ModelPredict(OH_AI_ModelHandle model, const OH_AI_TensorHandl OH_AI_TensorHandleArray *outputs, const OH_AI_KernelCallBack before, const OH_AI_KernelCallBack after) { if (model == nullptr) { - MS_LOG(ERROR) << "param is nullptr."; + MS_LOG(ERROR) << "model is nullptr."; return OH_AI_STATUS_LITE_NULLPTR; } + std::vector ms_tensor_inputs; + for (size_t i = 0; i < inputs.handle_num; i++) { + auto user_input = static_cast(inputs.handle_list[i]); + ms_tensor_inputs.push_back(*user_input); + } + auto impl = static_cast(model); - auto ret = impl->Predict(inputs.handle_list, inputs.handle_num, &(outputs->handle_list), &(outputs->handle_num), - before, after); + mindspore::MSKernelCallBack before_call_back = impl->TransCallBack(before); + mindspore::MSKernelCallBack after_call_back = impl->TransCallBack(after); + + std::vector ms_tensor_outputs; + auto ret = impl->model_->Predict(ms_tensor_inputs, &ms_tensor_outputs, before_call_back, after_call_back); if (!ret.IsOk()) { MS_LOG(ERROR) << "Predict fail, ret :" << ret; } + outputs->handle_list = reinterpret_cast(impl->GetOutputs(&outputs->handle_num)); return static_cast(ret.StatusCode()); } @@ -431,11 +259,6 @@ OH_AI_Status OH_AI_ModelRunStep(OH_AI_ModelHandle model, const OH_AI_KernelCallB return OH_AI_STATUS_LITE_NOT_SUPPORT; } -OH_AI_Status OH_AI_ModelSetTrainMode(const OH_AI_ModelHandle model, bool train) { - MS_LOG(ERROR) << "Unsupported Feature."; - return OH_AI_STATUS_LITE_NOT_SUPPORT; -} - OH_AI_Status OH_AI_ModelExportWeight(const OH_AI_ModelHandle model, const char *export_path) { MS_LOG(ERROR) << "Unsupported Feature."; return OH_AI_STATUS_LITE_NOT_SUPPORT; @@ -443,7 +266,7 @@ OH_AI_Status OH_AI_ModelExportWeight(const OH_AI_ModelHandle model, const char * OH_AI_TensorHandleArray OH_AI_ModelGetInputs(const OH_AI_ModelHandle model) { if (model == nullptr) { - MS_LOG(ERROR) << "param is nullptr."; + MS_LOG(ERROR) << "model is nullptr."; return {0, nullptr}; } auto impl = static_cast(model); @@ -454,7 +277,7 @@ OH_AI_TensorHandleArray OH_AI_ModelGetInputs(const OH_AI_ModelHandle model) { OH_AI_TensorHandleArray OH_AI_ModelGetOutputs(const OH_AI_ModelHandle model) { if (model == nullptr) { - MS_LOG(ERROR) << "param is nullptr."; + MS_LOG(ERROR) << "model is nullptr."; return {0, nullptr}; } auto impl = static_cast(model); @@ -465,7 +288,7 @@ OH_AI_TensorHandleArray OH_AI_ModelGetOutputs(const OH_AI_ModelHandle model) { OH_AI_TensorHandle OH_AI_ModelGetInputByTensorName(const OH_AI_ModelHandle model, const char *tensor_name) { if (model == nullptr || tensor_name == nullptr) { - MS_LOG(ERROR) << "param is nullptr."; + MS_LOG(ERROR) << "model/tensor_name is nullptr."; return nullptr; } auto impl = static_cast(model); @@ -482,7 +305,7 @@ OH_AI_TensorHandle OH_AI_ModelGetInputByTensorName(const OH_AI_ModelHandle model OH_AI_TensorHandle OH_AI_ModelGetOutputByTensorName(const OH_AI_ModelHandle model, const char *tensor_name) { if (model == nullptr || tensor_name == nullptr) { - MS_LOG(ERROR) << "param is nullptr."; + MS_LOG(ERROR) << "model/tensor_name is nullptr."; return nullptr; } auto impl = static_cast(model); @@ -496,3 +319,294 @@ OH_AI_TensorHandle OH_AI_ModelGetOutputByTensorName(const OH_AI_ModelHandle mode MS_LOG(ERROR) << "tensor is not exist."; return nullptr; } + +OH_AI_TrainCfgHandle OH_AI_TrainCfgCreate() { + auto impl = new (std::nothrow) mindspore::TrainCfg(); + if (impl == nullptr) { + MS_LOG(ERROR) << "TrainCfg implement is nullptr."; + return nullptr; + } + return static_cast(impl); +} + +void OH_AI_TrainCfgDestroy(OH_AI_TrainCfgHandle *train_cfg) { + if (train_cfg != nullptr && *train_cfg != nullptr) { + auto impl = static_cast(*train_cfg); + delete impl; + *train_cfg = nullptr; + } +} + +char **OH_AI_TrainCfgGetLossName(OH_AI_TrainCfgHandle train_cfg, size_t *num) { + if (train_cfg == nullptr || num == nullptr) { + MS_LOG(ERROR) << "train_cfg/num is nullptr."; + return nullptr; + } + auto impl = static_cast(train_cfg); + auto loss_name = impl->GetLossName(); + *num = loss_name.size(); + char **name = static_cast(malloc(loss_name.size())); + if (name == nullptr) { + MS_LOG(ERROR) << "Failed to malloc loss_name."; + return nullptr; + } + for (size_t i = 0; i < loss_name.size(); i++) { + name[i] = static_cast(malloc(loss_name[i].size() + 1)); + strcpy(name[i], loss_name[i].c_str()); + } + return name; +} + +void OH_AI_TrainCfgSetLossName(OH_AI_TrainCfgHandle train_cfg, const char **loss_name, size_t num) { + if (train_cfg == nullptr) { + MS_LOG(ERROR) << "train_cfg is nullptr."; + return; + } + auto impl = static_cast(train_cfg); + std::vector vec_name; + for (size_t i = 0; i < num; i++) { + vec_name.push_back(loss_name[i]); + } + impl->SetLossName(vec_name); +} + +OH_AI_OptimizationLevel OH_AI_TrainCfgGetOptimizationLevel(OH_AI_TrainCfgHandle train_cfg) { + if (train_cfg == nullptr) { + MS_LOG(ERROR) << "train_cfg is nullptr."; + return OH_AI_KO0; + } + auto impl = static_cast(train_cfg); + return static_cast(impl->optimization_level_); +} + +void OH_AI_TrainCfgSetOptimizationLevel(OH_AI_TrainCfgHandle train_cfg, OH_AI_OptimizationLevel level) { + if (train_cfg == nullptr) { + MS_LOG(ERROR) << "train_cfg is nullptr."; + return; + } + auto impl = static_cast(train_cfg); + impl->optimization_level_ = static_cast(level); +} + +OH_AI_Status OH_AI_TrainModelBuild(OH_AI_ModelHandle model, const void *model_data, size_t data_size, + OH_AI_ModelType model_type, const OH_AI_ContextHandle model_context, + const OH_AI_TrainCfgHandle train_cfg) { + if (model == nullptr || model_data == nullptr || model_context == nullptr) { + MS_LOG(ERROR) << "model/model_data/model_context is nullptr."; + return OH_AI_STATUS_LITE_NULLPTR; + } + if (model_type == OH_AI_MODELTYPE_INVALID) { + MS_LOG(ERROR) << "model_type is invalid."; + return OH_AI_STATUS_LITE_PARAM_INVALID; + } + auto impl = static_cast(model); + + mindspore::Graph graph; + auto status = mindspore::Serialization::Load(model_data, data_size, static_cast(model_type), &graph); + if (status != mindspore::kSuccess) { + MS_LOG(ERROR) << "load ms file failed."; + return OH_AI_STATUS_LITE_ERROR; + } + auto context = static_cast(model_context); + auto build_train_cfg = static_cast(train_cfg); + if (impl->context_.get() != context) { + impl->context_.reset(context); + } + auto ret = impl->model_->Build(static_cast(graph), impl->context_, + std::shared_ptr(build_train_cfg)); + if (ret != mindspore::kSuccess) { + MS_LOG(ERROR) << "Load and compile failed"; + } + return static_cast(ret.StatusCode()); +} + +OH_AI_Status OH_AI_TrainModelBuildFromFile(OH_AI_ModelHandle model, const char *model_path, + OH_AI_ModelType model_type, const OH_AI_ContextHandle model_context, + const OH_AI_TrainCfgHandle train_cfg) { + if (model == nullptr || model_path == nullptr || model_context == nullptr) { + MS_LOG(ERROR) << "model/model_path/model_context is nullptr."; + return OH_AI_STATUS_LITE_NULLPTR; + } + if (model_type == OH_AI_MODELTYPE_INVALID) { + MS_LOG(ERROR) << "model_type is invalid."; + return OH_AI_STATUS_LITE_PARAM_INVALID; + } + auto impl = static_cast(model); + + mindspore::Graph graph; + auto status = mindspore::Serialization::Load(model_path, static_cast(model_type), &graph); + if (status != mindspore::kSuccess) { + MS_LOG(ERROR) << "load ms file failed. " << model_path; + return OH_AI_STATUS_LITE_ERROR; + } + auto context = static_cast(model_context); + auto build_train_cfg = static_cast(train_cfg); + if (impl->context_.get() != context) { + impl->context_.reset(context); + } + auto ret = impl->model_->Build(static_cast(graph), impl->context_, + std::shared_ptr(build_train_cfg)); + if (ret != mindspore::kSuccess) { + MS_LOG(ERROR) << "Load and compile failed"; + } + return static_cast(ret.StatusCode()); +} + +OH_AI_Status OH_AI_ModelSetLearningRate(OH_AI_ModelHandle model, float learning_rate) { + if (model == nullptr) { + MS_LOG(ERROR) << "model is nullptr."; + return OH_AI_STATUS_LITE_PARAM_INVALID; + } + auto impl = static_cast(model); + auto ret = impl->model_->SetLearningRate(learning_rate); + return static_cast(ret.StatusCode()); +} + +float OH_AI_ModelGetLearningRate(OH_AI_ModelHandle model) { + if (model == nullptr) { + MS_LOG(ERROR) << "model is nullptr."; + return OH_AI_STATUS_LITE_PARAM_INVALID; + } + auto impl = static_cast(model); + return impl->model_->GetLearningRate(); +} + +OH_AI_Status OH_AI_RunStep(OH_AI_ModelHandle model, const OH_AI_KernelCallBack before, const OH_AI_KernelCallBack after) { + if (model == nullptr) { + MS_LOG(ERROR) << "model is nullptr."; + return OH_AI_STATUS_LITE_PARAM_INVALID; + } + auto impl = static_cast(model); + auto ret = impl->model_->RunStep(impl->TransCallBack(before), impl->TransCallBack(after)); + return static_cast(ret.StatusCode()); +} + +OH_AI_TensorHandleArray OH_AI_ModelGetWeights(OH_AI_ModelHandle model) { + if (model == nullptr) { + MS_LOG(ERROR) << "model is nullptr."; + return {0, nullptr}; + } + auto impl = static_cast(model); + auto features = impl->model_->GetFeatureMaps(); + size_t handle_num = features.size(); + + mindspore::MSTensor **handle_list = static_cast(malloc( + handle_num * sizeof(mindspore::MSTensor *))); + if (handle_list == nullptr) { + MS_LOG(ERROR) << "Failed to malloc handle_list."; + return {0, nullptr}; + } + for (size_t i = 0; i < handle_num; i++) { + handle_list[i] = new mindspore::MSTensor(features[i].impl()); + } + return {handle_num, reinterpret_cast(handle_list)}; +} + +OH_AI_Status OH_AI_ModelUpdateWeights(OH_AI_ModelHandle model, const OH_AI_TensorHandleArray new_weights) { + if (model == nullptr) { + MS_LOG(ERROR) << "model is nullptr."; + return OH_AI_STATUS_LITE_PARAM_INVALID; + } + auto impl = static_cast(model); + std::vector weights; + for (size_t i = 0; i < new_weights.handle_num; i++) { + weights.push_back(*static_cast(new_weights.handle_list[i])); + } + auto ret = impl->model_->UpdateWeights(weights); + return static_cast(ret.StatusCode()); +} + +bool OH_AI_ModelGetTrainMode(OH_AI_ModelHandle model) { + if (model == nullptr) { + MS_LOG(ERROR) << "model is nullptr."; + return false; + } + auto impl = static_cast(model); + return impl->model_->GetTrainMode(); +} + +OH_AI_Status OH_AI_ModelSetTrainMode(OH_AI_ModelHandle model, bool train) { + if (model == nullptr) { + MS_LOG(ERROR) << "model is nullptr."; + return OH_AI_STATUS_LITE_PARAM_INVALID; + } + auto impl = static_cast(model); + auto ret = impl->model_->SetTrainMode(train); + return static_cast(ret.StatusCode()); +} + +OH_AI_Status OH_AI_ModelSetupVirtualBatch(OH_AI_ModelHandle model, int virtual_batch_multiplier, float lr, float momentum) { + if (model == nullptr) { + MS_LOG(ERROR) << "model is nullptr."; + return OH_AI_STATUS_LITE_PARAM_INVALID; + } + auto impl = static_cast(model); + auto ret = impl->model_->SetupVirtualBatch(virtual_batch_multiplier, lr, momentum); + return static_cast(ret.StatusCode()); +} + +OH_AI_Status OH_AI_ExportModel(OH_AI_ModelHandle model, OH_AI_ModelType model_type, const char *model_file, + OH_AI_QuantizationType quantization_type, bool export_inference_only, + char **output_tensor_name, size_t num) { + if (model == nullptr) { + MS_LOG(ERROR) << "model is nullptr."; + return OH_AI_STATUS_LITE_PARAM_INVALID; + } + auto impl = static_cast(model); + std::vector tensor_name; + for (size_t i = 0; i < num; i++) { + tensor_name.push_back(output_tensor_name[i]); + } + auto ret = mindspore::Serialization::ExportModel(*(impl->model_.get()), static_cast(model_type), + model_file, + static_cast(quantization_type), + export_inference_only, tensor_name); + if (!ret.IsOk()) { + MS_LOG(ERROR) << "export model fail, ret :" << ret; + } + return static_cast(ret.StatusCode()); +} + +OH_AI_Status OH_AI_ExportModelBuffer(OH_AI_ModelHandle model, OH_AI_ModelType model_type, char **model_data, + size_t *data_size, OH_AI_QuantizationType quantization_type, + bool export_inference_only, char **output_tensor_name, size_t num) { + if (model == nullptr) { + MS_LOG(ERROR) << "model is nullptr."; + return OH_AI_STATUS_LITE_PARAM_INVALID; + } + auto impl = static_cast(model); + std::vector tensor_name; + for (size_t i = 0; i < num; i++) { + tensor_name.push_back(output_tensor_name[i]); + } + mindspore::Buffer buffer; + auto ret = mindspore::Serialization::ExportModel(*(impl->model_.get()), static_cast(model_type), + &buffer, static_cast(quantization_type), + export_inference_only, tensor_name); + auto data = static_cast(buffer.MutableData()); + *model_data = (char *) malloc(buffer.DataSize()); + *data_size = buffer.DataSize(); + memcpy(*model_data, data, buffer.DataSize()); + if (!ret.IsOk()) { + MS_LOG(ERROR) << "export model fail, ret :" << ret; + } + return static_cast(ret.StatusCode()); +} + +OH_AI_Status OH_AI_ExportWeightsCollaborateWithMicro(OH_AI_ModelHandle model, OH_AI_ModelType model_type, const char *weight_file, + bool is_inference, bool enable_fp16, char **changeable_weights_name, size_t num) { + if (model == nullptr) { + MS_LOG(ERROR) << "model is nullptr."; + return OH_AI_STATUS_LITE_PARAM_INVALID; + } + auto impl = static_cast(model); + std::vector weights_name; + for (size_t i = 0; i < num; i++) { + weights_name.push_back(changeable_weights_name[i]); + } + auto ret = mindspore::Serialization::ExportWeightsCollaborateWithMicro(*(impl->model_.get()), static_cast(model_type), weight_file, is_inference, enable_fp16, weights_name); + if (!ret.IsOk()) { + MS_LOG(ERROR) << "export model fail, ret :" << ret; + } + return static_cast(ret.StatusCode()); +} diff --git a/mindspore/lite/src/litert/c_api/tensor_c.cc b/mindspore/lite/src/litert/c_api/tensor_c.cc index 7b5c4c2f..4b1e6aff 100644 --- a/mindspore/lite/src/litert/c_api/tensor_c.cc +++ b/mindspore/lite/src/litert/c_api/tensor_c.cc @@ -17,7 +17,6 @@ #include "include/api/status.h" #include "src/tensor.h" #include "src/litert/cxx_api/tensor/tensor_impl.h" -#include "src/litert/inner_allocator.h" OH_AI_TensorHandle OH_AI_TensorCreate(const char *name, OH_AI_DataType type, const int64_t *shape, size_t shape_num, const void *data, size_t data_len) { @@ -31,18 +30,23 @@ OH_AI_TensorHandle OH_AI_TensorCreate(const char *name, OH_AI_DataType type, con } auto lite_tensor = mindspore::lite::Tensor::CreateTensor(name, static_cast(type), vec_shape, data, data_len); - auto impl = new (std::nothrow) mindspore::LiteTensorImpl(lite_tensor); - if (impl == nullptr || impl->lite_tensor() == nullptr) { + auto lite_tensor_impl = std::make_shared(lite_tensor); + if (lite_tensor_impl == nullptr || lite_tensor_impl->lite_tensor() == nullptr) { MS_LOG(ERROR) << "Failed to allocate tensor impl."; return nullptr; } - impl->set_from_session(false); + lite_tensor_impl->set_from_session(false); + auto impl = new (std::nothrow) mindspore::MSTensor(lite_tensor_impl); + if (impl == nullptr) { + MS_LOG(ERROR) << "Failed to allocate MSTensor."; + return nullptr; + } return impl; } void OH_AI_TensorDestroy(OH_AI_TensorHandle *tensor) { if (tensor != nullptr && *tensor != nullptr) { - auto impl = static_cast(*tensor); + auto impl = static_cast(*tensor); delete impl; *tensor = nullptr; } @@ -53,20 +57,14 @@ OH_AI_TensorHandle OH_AI_TensorClone(OH_AI_TensorHandle tensor) { MS_LOG(ERROR) << "param is nullptr."; return nullptr; } - auto impl = static_cast(tensor); - auto lite_tensor = static_cast(impl->lite_tensor()); - auto clone = mindspore::lite::Tensor::CopyTensor(*lite_tensor, true, lite_tensor->allocator()); - if (clone == nullptr) { - MS_LOG(ERROR) << "Failed to allocate tensor."; - return nullptr; - } - auto clone_impl = new (std::nothrow) mindspore::LiteTensorImpl(clone); + auto impl = static_cast(tensor); + auto clone_impl = impl->Clone(); if (clone_impl == nullptr) { - delete clone; MS_LOG(ERROR) << "Failed to allocate tensor impl."; return nullptr; } - clone_impl->set_from_session(false); + std::static_pointer_cast(clone_impl->impl())->set_own_data(false); + clone_impl->SetTensorName(impl->Name() + "_duplicate"); return clone_impl; } @@ -75,8 +73,8 @@ void OH_AI_TensorSetName(OH_AI_TensorHandle tensor, const char *name) { MS_LOG(ERROR) << "param is nullptr."; return; } - auto impl = static_cast(tensor); - impl->SetName(name); + auto impl = static_cast(tensor); + impl->SetTensorName(name); } const char *OH_AI_TensorGetName(const OH_AI_TensorHandle tensor) { @@ -84,8 +82,8 @@ const char *OH_AI_TensorGetName(const OH_AI_TensorHandle tensor) { MS_LOG(ERROR) << "param is nullptr."; return nullptr; } - auto impl = static_cast(tensor); - return impl->Name().c_str(); + auto ms_tensor = static_cast(tensor); + return std::static_pointer_cast(ms_tensor->impl())->Name().c_str(); } void OH_AI_TensorSetDataType(OH_AI_TensorHandle tensor, OH_AI_DataType type) { @@ -93,7 +91,7 @@ void OH_AI_TensorSetDataType(OH_AI_TensorHandle tensor, OH_AI_DataType type) { MS_LOG(ERROR) << "param is nullptr."; return; } - auto impl = static_cast(tensor); + auto impl = static_cast(tensor); impl->SetDataType(static_cast(type)); } @@ -102,7 +100,7 @@ OH_AI_DataType OH_AI_TensorGetDataType(const OH_AI_TensorHandle tensor) { MS_LOG(ERROR) << "param is nullptr."; return OH_AI_DATATYPE_UNKNOWN; } - auto impl = static_cast(tensor); + auto impl = static_cast(tensor); auto dtype = impl->DataType(); return static_cast(dtype); } @@ -112,7 +110,7 @@ void OH_AI_TensorSetShape(OH_AI_TensorHandle tensor, const int64_t *shape, size_ MS_LOG(ERROR) << "param is nullptr."; return; } - auto impl = static_cast(tensor); + auto impl = static_cast(tensor); std::vector vec_shape(shape_num); for (size_t i = 0; i < shape_num; i++) { vec_shape[i] = shape[i]; @@ -125,7 +123,7 @@ const int64_t *OH_AI_TensorGetShape(const OH_AI_TensorHandle tensor, size_t *sha MS_LOG(ERROR) << "param is nullptr."; return nullptr; } - auto impl = static_cast(tensor); + auto impl = static_cast(tensor); *shape_num = impl->Shape().size(); return impl->Shape().data(); } @@ -135,7 +133,7 @@ void OH_AI_TensorSetFormat(OH_AI_TensorHandle tensor, OH_AI_Format format) { MS_LOG(ERROR) << "param is nullptr."; return; } - auto impl = static_cast(tensor); + auto impl = static_cast(tensor); return impl->SetFormat(static_cast(format)); } @@ -144,8 +142,8 @@ OH_AI_Format OH_AI_TensorGetFormat(const OH_AI_TensorHandle tensor) { MS_LOG(ERROR) << "param is nullptr."; return OH_AI_FORMAT_NHWC; } - auto impl = static_cast(tensor); - return static_cast(impl->Format()); + auto impl = static_cast(tensor); + return static_cast(impl->format()); } void OH_AI_TensorSetData(OH_AI_TensorHandle tensor, void *data) { @@ -153,16 +151,34 @@ void OH_AI_TensorSetData(OH_AI_TensorHandle tensor, void *data) { MS_LOG(ERROR) << "param is nullptr."; return; } - auto impl = static_cast(tensor); + auto impl = static_cast(tensor); return impl->SetData(data, true); } +OH_AI_Status OH_AI_TensorSetUserData(OH_AI_TensorHandle tensor, void *data, size_t data_size) { + if (tensor == nullptr) { + MS_LOG(ERROR) << "param is nullptr."; + return OH_AI_STATUS_LITE_NULLPTR; + } + + auto impl = static_cast(tensor); + if ((impl->DataSize() > 0) && (data_size != impl->DataSize())) { + MS_LOG(ERROR) << "input data size does not match inner data size"; + return OH_AI_STATUS_LITE_PARAM_INVALID; + } + + // This is one tricky way to represent that the inner data is not owned by tensor itself. + impl->SetAllocator(nullptr); + impl->SetData(data, false); + return OH_AI_STATUS_SUCCESS; +} + const void *OH_AI_TensorGetData(const OH_AI_TensorHandle tensor) { if (tensor == nullptr) { MS_LOG(ERROR) << "param is nullptr."; return nullptr; } - auto impl = static_cast(tensor); + auto impl = static_cast(tensor); return impl->Data().get(); } @@ -171,7 +187,7 @@ void *OH_AI_TensorGetMutableData(const OH_AI_TensorHandle tensor) { MS_LOG(ERROR) << "param is nullptr."; return nullptr; } - auto impl = static_cast(tensor); + auto impl = static_cast(tensor); return impl->MutableData(); } @@ -180,7 +196,7 @@ int64_t OH_AI_TensorGetElementNum(const OH_AI_TensorHandle tensor) { MS_LOG(ERROR) << "param is nullptr."; return 0; } - auto impl = static_cast(tensor); + auto impl = static_cast(tensor); return impl->ElementNum(); } @@ -189,6 +205,6 @@ size_t OH_AI_TensorGetDataSize(const OH_AI_TensorHandle tensor) { MS_LOG(ERROR) << "param is nullptr."; return 0; } - auto impl = static_cast(tensor); + auto impl = static_cast(tensor); return impl->DataSize(); } diff --git a/mindspore/lite/src/litert/c_api/type_c_private.h b/mindspore/lite/src/litert/c_api/type_c_private.h new file mode 100644 index 00000000..2d3b3883 --- /dev/null +++ b/mindspore/lite/src/litert/c_api/type_c_private.h @@ -0,0 +1,40 @@ +/** + * Copyright 2023 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef MINDSPORE_LITE_SRC_LITERT_C_API_TYPE_C_PRIVATE_H_ +#define MINDSPORE_LITE_SRC_LITERT_C_API_TYPE_C_PRIVATE_H_ + +#include +#include +#include +#include +#include "include/c_api/types_c.h" + +#ifdef __cplusplus +extern "C" { +#endif + +#define NNRT_DEVICE_NAME_MAX (128) + +struct NNRTDeviceDesc { + size_t device_id; + OH_AI_NNRTDeviceType device_type; + char device_name[NNRT_DEVICE_NAME_MAX]; +}; + +#ifdef __cplusplus +} +#endif +#endif // MINDSPORE_LITE_SRC_LITERT_C_API_TYPE_C_PRIVATE_H_ diff --git a/mindspore/lite/src/litert/cxx_api/context.cc b/mindspore/lite/src/litert/cxx_api/context.cc index 1371bcf0..e5f19d28 100644 --- a/mindspore/lite/src/litert/cxx_api/context.cc +++ b/mindspore/lite/src/litert/cxx_api/context.cc @@ -50,6 +50,11 @@ constexpr auto kModelOptionAscendDynamicBatchSize = "mindspore.option.ascend.dyn constexpr auto kModelOptionAscendDynamicImageSize = "mindspore.option.ascend.dynamic_image_size"; constexpr auto kModelOptionAscendBufferOptimize = "mindspore.option.ascend.buffer_optimize"; constexpr auto kModelOptionAscendRankID = "mindspore.option.ascend.rank_id"; +constexpr auto kModelOptionNNRTDeviceID = "mindspore.option.nnrt.device_id"; +constexpr auto kModelOptionNNRTPerformanceMode = "mindspore.option.nnrt.performance_mode"; +constexpr auto kModelOptionNNRTPriority = "mindspore.option.nnrt.priority"; +constexpr auto kModelOptionNNRTEnableFP16 = "mindspore.option.nnrt.enable_fp16"; +constexpr auto kModelOptionNNRTExtensions = "mindspore.option.nnrt.extensions"; #ifdef USE_GLOG extern "C" { extern void mindspore_log_init(); @@ -684,4 +689,84 @@ std::vector AscendDeviceInfo::GetBufferOptimizeModeChar() const { const std::string &ref = GetValue(data_, kModelOptionAscendBufferOptimize); return StringToChar(ref); } + +void NNRTDeviceInfo::SetDeviceID(size_t device_id) { + if (data_ == nullptr) { + MS_LOG(ERROR) << "Invalid context."; + return; + } + data_->params[kModelOptionNNRTDeviceID] = device_id; +} + +size_t NNRTDeviceInfo::GetDeviceID() const { + if (data_ == nullptr) { + MS_LOG(ERROR) << "Invalid context."; + return 0; + } + return GetValue(data_, kModelOptionNNRTDeviceID); +} + +void NNRTDeviceInfo::SetPerformanceMode(int performance_mode) { + if (data_ == nullptr) { + MS_LOG(ERROR) << "Invalid context."; + return; + } + data_->params[kModelOptionNNRTPerformanceMode] = performance_mode; +} + +int NNRTDeviceInfo::GetPerformanceMode() const { + if (data_ == nullptr) { + MS_LOG(ERROR) << "Invalid context."; + return 0; + } + return GetValue(data_, kModelOptionNNRTPerformanceMode); +} + +void NNRTDeviceInfo::SetPriority(int priority) { + if (data_ == nullptr) { + MS_LOG(ERROR) << "Invalid context."; + return; + } + data_->params[kModelOptionNNRTPriority] = priority; +} + +int NNRTDeviceInfo::GetPriority() const { + if (data_ == nullptr) { + MS_LOG(ERROR) << "Invalid context."; + return 0; + } + return GetValue(data_, kModelOptionNNRTPriority); +} + +void NNRTDeviceInfo::SetEnableFP16(bool is_fp16) { + if (data_ == nullptr) { + MS_LOG(ERROR) << "Invalid context."; + return; + } + data_->params[kModelOptionNNRTEnableFP16] = is_fp16; +} + +bool NNRTDeviceInfo::GetEnableFP16() const { + if (data_ == nullptr) { + MS_LOG(ERROR) << "Invalid context."; + return false; + } + return GetValue(data_, kModelOptionNNRTEnableFP16); +} + +void NNRTDeviceInfo::SetExtensions(const std::vector &extensions) { + if (data_ == nullptr) { + MS_LOG(ERROR) << "Invalid context."; + return; + } + data_->params[kModelOptionNNRTExtensions] = extensions; +} + +std::vector NNRTDeviceInfo::GetExtensions() const { + if (data_ == nullptr) { + MS_LOG(ERROR) << "Invalid context."; + return {}; + } + return GetValue>(data_, kModelOptionNNRTExtensions); +} } // namespace mindspore diff --git a/mindspore/lite/src/litert/cxx_api/converters.cc b/mindspore/lite/src/litert/cxx_api/converters.cc index 0ff345cc..e54a36ee 100644 --- a/mindspore/lite/src/litert/cxx_api/converters.cc +++ b/mindspore/lite/src/litert/cxx_api/converters.cc @@ -86,6 +86,23 @@ Status ContextUtils::AddCustomDevice(lite::InnerContext *inner_context, return kSuccess; } +Status ContextUtils::AddNNRtDevice(lite::InnerContext *inner_context, size_t device_id, int performance_mode, + int priority, bool enable_fp16, const std::vector &extensions) { + lite::DeviceInfo device_info = {0}; + device_info.nnrt_device_info_.device_id_ = device_id; + device_info.nnrt_device_info_.performance_mode_ = performance_mode; + device_info.nnrt_device_info_.priority_ = priority; + device_info.nnrt_device_info_.enable_fp16_ = enable_fp16; + for (auto src_extension: extensions) { + lite::Extension dest_extension; + dest_extension.name = src_extension.name; + dest_extension.value = src_extension.value; + device_info.nnrt_device_info_.extensions_.push_back(dest_extension); + } + inner_context->device_list_.push_back({lite::DT_NNRT, device_info}); + return kSuccess; +} + void ContextUtils::ResetContextDefaultParam(Context *context) { if (context->GetInterOpParallelNum() == 0) { context->SetInterOpParallelNum(kDefaultInterOpParallelNum); @@ -163,44 +180,11 @@ std::shared_ptr ContextUtils::Convert(Context *context) { ret = AddAscendDevice(inner_context.get(), device.get()); } else if (device->GetDeviceType() == kCustomDevice) { ret = AddCustomDevice(inner_context.get(), device); - } - if (ret != kSuccess) { - MS_LOG(ERROR) << "Add device failed!"; - return nullptr; - } - } - return inner_context; -} - -std::shared_ptr ContextUtils::Convert(const ContextC *context_c) { - auto inner_context = std::make_shared(); - if ((context_c == nullptr) || (inner_context == nullptr)) { - MS_LOG(ERROR) << "Invalid context pointers."; - return nullptr; - } - auto device_list = context_c->device_info_list; - if (device_list.size() == 0 || device_list.size() > kMaxNumOfDevices) { - MS_LOG(ERROR) << "Device num, support min: 1, max: " << kMaxNumOfDevices; - return nullptr; - } - SetContextAttr(context_c->thread_num, 1, context_c->enable_parallel, context_c->affinity_core_list, - context_c->delegate_mode, context_c->delegate, inner_context.get()); - inner_context->device_list_.clear(); - Status ret = kLiteError; - for (auto &device_info_c : device_list) { - MS_CHECK_TRUE_RET(device_info_c != nullptr, nullptr); - lite::DeviceInfo device_info = {{0}}; - if (device_info_c->device_type == OH_AI_DEVICETYPE_CPU) { - if (device_info_c->allocator == nullptr) { - device_info_c->allocator = Allocator::Create(); - } - ret = AddCpuDevice(device_info_c->allocator, context_c->affinity_mode, device_info_c->enable_fp16, - device_info_c->provider, device_info_c->provider_device, inner_context.get()); - } else if (device_info_c->device_type == OH_AI_DEVICETYPE_GPU) { - ret = AddGpuDevice(device_info_c->enable_fp16, 0, 0, 0, false, nullptr, nullptr, device_info_c->provider, - device_info_c->provider_device, device_info_c->allocator, inner_context.get()); - } else if (device_info_c->device_type == OH_AI_DEVICETYPE_KIRIN_NPU) { - ret = AddNpuDevice(device_info_c->enable_fp16, device_info_c->frequency, inner_context.get()); + } else if (device->GetDeviceType() == kNNRt) { + auto nnrt_device_info = device->Cast(); + ret = AddNNRtDevice(inner_context.get(), nnrt_device_info->GetDeviceID(), + nnrt_device_info->GetPerformanceMode(), nnrt_device_info->GetPriority(), + nnrt_device_info->GetEnableFP16(), nnrt_device_info->GetExtensions()); } if (ret != kSuccess) { MS_LOG(ERROR) << "Add device failed!"; diff --git a/mindspore/lite/src/litert/cxx_api/converters.h b/mindspore/lite/src/litert/cxx_api/converters.h index 0c043fc3..1af7c7df 100644 --- a/mindspore/lite/src/litert/cxx_api/converters.h +++ b/mindspore/lite/src/litert/cxx_api/converters.h @@ -24,14 +24,12 @@ #include "include/api/cfg.h" #include "include/train/train_cfg.h" #include "src/litert/inner_context.h" -#include "src/litert/c_api/context_c.h" #include "src/common/log_adapter.h" namespace mindspore { class MS_API ContextUtils { public: static std::shared_ptr Convert(Context *context); - static std::shared_ptr Convert(const ContextC *context_c); private: static void SetContextAttr(int32_t thread_num, int32_t inter_op_parallel_num, bool enable_parallel, @@ -48,6 +46,8 @@ class MS_API ContextUtils { static Status AddNpuDevice(bool enable_fp16, int frequency, lite::InnerContext *inner_context); static Status AddAscendDevice(lite::InnerContext *inner_context, DeviceInfoContext *device); static Status AddCustomDevice(lite::InnerContext *inner_context, const std::shared_ptr &device); + static Status AddNNRtDevice(lite::InnerContext *inner_context, size_t device_id, int performance_mode, int priority, + bool enable_fp16, const std::vector &extensions); static bool IsAffinityModeValid(int affinity_mode) { return affinity_mode >= lite::NO_BIND && affinity_mode <= lite::MID_CPU; } diff --git a/mindspore/lite/src/litert/delegate/nnrt/CMakeLists.txt b/mindspore/lite/src/litert/delegate/nnrt/CMakeLists.txt index 70aa63f3..625459e2 100644 --- a/mindspore/lite/src/litert/delegate/nnrt/CMakeLists.txt +++ b/mindspore/lite/src/litert/delegate/nnrt/CMakeLists.txt @@ -1,30 +1,13 @@ include_directories(${DDK_PATH}) include_directories($(CCSRC_DIR)/plugin/device/cpu/kernel) +include_directories(${CMAKE_SOURCE_DIR}/../../../../../../foundation/ai/neural_network_runtime/) include_directories(${CMAKE_CURRENT_SOURCE_DIR}/include) -#include_directories(/home/tony/wty/workspace/ohos/third_party/mindspore/mindspore/lite/mindir/include/inner) -#include_directories(/home/tony/wty/workspace/ohos/third_party/mindspore/mindspore/lite/mindir/include) + file(GLOB_RECURSE NNRT_SRC ${CMAKE_CURRENT_SOURCE_DIR}/*.cc ) - -#add_library(hiai SHARED IMPORTED) -#set_target_properties(hiai PROPERTIES IMPORTED_LOCATION -# ${DDK_LIB_PATH}/libhiai.so) -#add_library(hiai_ir SHARED IMPORTED) -#set_target_properties(hiai_ir PROPERTIES IMPORTED_LOCATION -# ${DDK_LIB_PATH}/libhiai_ir.so) -#add_library(hiai_ir_build SHARED IMPORTED) -#set_target_properties(hiai_ir_build PROPERTIES IMPORTED_LOCATION -# ${DDK_LIB_PATH}/libhiai_ir_build.so) -#add_library(npu_kernel_mid OBJECT ${NPU_RUNTIME_SRC}) -#add_dependencies(npu_kernel_mid fbs_src) -#target_link_libraries( -# npu_kernel_mid -# hiai -# hiai_ir -# hiai_ir_build -#) - file(GLOB convert_source checker/*.cc) -add_library(nnr_mid OBJECT ${NNRT_SRC} ${convert_source} ) \ No newline at end of file + +add_library(nnrt_mid OBJECT ${NNRT_SRC} ${convert_source}) +target_include_directories(nnrt_mid PUBLIC ${CMAKE_SOURCE_DIR}/../../../../../../foundation/ai/neural_network_runtime/) \ No newline at end of file diff --git a/mindspore/lite/src/litert/delegate/nnrt/checker/primitive_check.cc b/mindspore/lite/src/litert/delegate/nnrt/checker/primitive_check.cc index 4df7e477..6b191c8e 100644 --- a/mindspore/lite/src/litert/delegate/nnrt/checker/primitive_check.cc +++ b/mindspore/lite/src/litert/delegate/nnrt/checker/primitive_check.cc @@ -109,6 +109,8 @@ Status CheckPrimitiveSupported(const schema::Primitive *primitive) { return mindspore::kSuccess; case schema::PrimitiveType_Unsqueeze: return mindspore::kSuccess; + case schema::PrimitiveType_Custom: + return mindspore::kSuccess; default: { MS_LOG(WARNING) << "No primitive type :" << (int)(type); return mindspore::kLiteSuccessExit; diff --git a/mindspore/lite/src/litert/delegate/nnrt/nnrt_delegate.cc b/mindspore/lite/src/litert/delegate/nnrt/nnrt_delegate.cc index 34897331..9f012e76 100644 --- a/mindspore/lite/src/litert/delegate/nnrt/nnrt_delegate.cc +++ b/mindspore/lite/src/litert/delegate/nnrt/nnrt_delegate.cc @@ -13,144 +13,637 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + +#include +#include #include "nnrt_delegate.h" #include "checker/primitive_check.h" #include "src/common/log_adapter.h" -#include "interfaces/kits/c/neural_network_runtime.h" +#include "interfaces/kits/c/neural_network_runtime/neural_network_runtime.h" #include "interfaces/innerkits/c/neural_network_runtime_inner.h" #include "nnrt_model_kernel.h" +#include "schema/model_generated.h" +#include "schema/ops_generated.h" +#include "flatbuffers/flatbuffers.h" +#include "litert/tensor_category.h" + +namespace mindspore { +namespace lite { +void NNRTDelegate::InitCachePath() { + static const std::string kCachePathName = "CachePath"; + static const std::string kCacheVersion = "CacheVersion"; + + const auto &extensions = nnrt_device_info_.extensions_; -mindspore::Status mindspore::NNRTDelegate::Build(DelegateModel *model) { - if (this->nnrt_lite_graph == nullptr) { - MS_LOG(ERROR) << "nnrt_lite_graph is nullptr."; - return mindspore::kLiteError; + auto iter_path = std::find_if(extensions.begin(), extensions.end(), [](const Extension &extension) { + return extension.name == kCachePathName; + }); + if (iter_path != extensions.end()) { + cache_path_ = std::string(iter_path->value.begin(), iter_path->value.end()); } - if (this->nnrt_lite_graph->sub_graphs_.empty()) { - // must have at lease one subgraph - MS_LOG(ERROR) << "must have at lease one subgraph"; - return mindspore::kLiteError; + + auto iter_version = std::find_if(extensions.begin(), extensions.end(), [](const Extension &extension) { + return extension.name == kCacheVersion; + }); + if (iter_version != extensions.end()) { + std::string version_str = std::string(iter_version->value.begin(), iter_version->value.end()); + cache_version_ = static_cast(std::atol(version_str.c_str())); } - OH_NN_ReturnCode ret_code; - OH_NNModel *oh_nnmodel = OH_NNModel_Construct(); - if (oh_nnmodel == nullptr) { - MS_LOG(ERROR) << "Construct NNModel failed, oh_nnmodel is nullptr."; - return mindspore::kLiteError; +} + +Status NNRTDelegate::Build(DelegateModel *model) { +#ifdef SUPPORT_NNRT_METAGRAPH + if (IsKirinNPU()) { + MS_LOG(DEBUG) << "Choose to build nnrt model with Metagraph"; + InitCachePath(); + return BuildKirinNPUModel(model); } +#endif - ret_code = OH_NNModel_BuildFromLiteGraph(oh_nnmodel, this->nnrt_lite_graph); - if (ret_code != OH_NN_SUCCESS) { - MS_LOG(ERROR) << "Build NNModel failed, OH_NN_ReturnCode = " << ret_code; - OH_NNModel_Destroy(&oh_nnmodel); - return mindspore::kLiteError; + return BuildNormalModel(model); +} + +bool NNRTDelegate::IsCustomModel() const { + // check if there is only one Cutsom kernel in LiteModel. + if (lite_graph_ == nullptr) { + return false; + } + if (lite_graph_->all_nodes_.size() != 1) { + return false; + } + auto node = lite_graph_->all_nodes_[0]; + if (node == nullptr) { + return false; + } + if (node->node_type_ != mindspore::schema::PrimitiveType_Custom) { + return false; + } + return true; +} + +#ifdef SUPPORT_NNRT_METAGRAPH +bool NNRTDelegate::IsKirinNPU() const { + const std::string kirin_npu_name_prefix = "NPU_"; + auto device_id = nnrt_device_info_.device_id_; + const char *device_name; + auto ret = OH_NNDevice_GetName(device_id, &device_name); + if (ret != OH_NN_SUCCESS) { + MS_LOG(WARNING) << "Get name of device: " << device_id << " failed, error: " << ret; + return false; + } + + if (strncmp(kirin_npu_name_prefix.c_str(), device_name, kirin_npu_name_prefix.size()) != 0) { + MS_LOG(WARNING) << "strncmp: " << device_id << " failed, device_name: " << device_name; + return false; + } + return true; +} + +Status NNRTDelegate::BuildKirinNPUModel(DelegateModel *model) { + OH_NNModel *nn_model = OH_NNModel_Construct(); + if (nn_model == nullptr) { + MS_LOG(ERROR) << "Create NNModel failed, result is nullptr"; + return kLiteNullptr; + } + + size_t extension_size = nnrt_device_info_.extensions_.size(); + std::vector extensions; + MS_LOG_DEBUG << "set extensions, item number: " << extension_size; + const size_t kExtensionNameMax = 128; // This is a length limitation in NNRT API. + for (size_t i = 0; i < extension_size; i++) { + auto &src_extension = nnrt_device_info_.extensions_[i]; + OH_NN_Extension dst_extension; + dst_extension.name[kExtensionNameMax - 1] = '\0'; + strncpy(dst_extension.name, src_extension.name.c_str(), kExtensionNameMax - 1); + dst_extension.value = (char *)((void *)src_extension.value.data()); + dst_extension.valueSize = src_extension.value.size(); + extensions.push_back(dst_extension); + MS_LOG_DEBUG << "set extension, item name: " << dst_extension.name << ", value size: " << dst_extension.valueSize; + } + + if (IsCustomModel()) { + auto ret = OH_NNModel_BuildFromLiteGraph(nn_model, lite_graph_); + if (ret != OH_NN_SUCCESS) { + MS_LOG(ERROR) << "Build NNModel failed, ret: " << ret; + OH_NNModel_Destroy(&nn_model); + return kLiteError; + } + } else { + SetKirinModelInputsAndOutputs(nn_model); + auto ret = OH_NNModel_BuildFromMetaGraph(nn_model, meta_graph_, extensions.data(), extensions.size()); + if (ret != OH_NN_SUCCESS) { + MS_LOG(ERROR) << "Build NNModel failed, ret: " << ret; + OH_NNModel_Destroy(&nn_model); + return kLiteError; + } + } + + auto ret2 = CreateFullModelKernel(model, nn_model); + if (ret2 != kSuccess) { + MS_LOG(ERROR) << "Create full model kernel failed, ret: " << ret2; + return kLiteError; } - MS_LOG(INFO) << "NNRTDelegate creates NNModel success."; + return kSuccess; +} + +std::vector NNRTDelegate::CreateNNTensorInfos(const std::vector &indices) const { + std::vector nn_tensor_infos; + for (auto index: indices) { + auto tensor = lite_graph_->all_tensors_[index]; + auto shape = tensor->dims(); + auto data_type = tensor->dataType(); + auto name = tensor->name(); + auto format = tensor->format(); - OH_NNCompilation *oh_nn_compilation = nullptr; - oh_nn_compilation = OH_NNCompilation_Construct(oh_nnmodel); + OH_NN_TensorInfo info; + info.dataType = CastToNNRTDataType(static_cast(data_type)); + info.dimensions = shape->data(); + info.dimensionCount = shape->size(); + strcpy(info.name, name->c_str()); + info.format = CastToNNRTFormat(static_cast(format)); + nn_tensor_infos.push_back(info); + } + return nn_tensor_infos; +} - if (oh_nn_compilation == nullptr) { +Status NNRTDelegate::SetKirinModelInputsAndOutputs(OH_NNModel *nn_model) { + std::vector inputInfos; + std::vector outputInfos; + auto input_infos = CreateNNTensorInfos(lite_graph_->input_indices_); + auto output_infos = CreateNNTensorInfos(lite_graph_->output_indices_); + OH_NNModel_SetInputsAndOutputsInfo(nn_model, input_infos.data(), input_infos.size(), output_infos.data(), + output_infos.size()); + return kSuccess; +} + +Status NNRTDelegate::CreateFullModelKernel(DelegateModel *model, OH_NNModel *nn_model) { + OH_NNCompilation *nn_compilation = OH_NNCompilation_Construct(nn_model); + if (nn_compilation == nullptr) { MS_LOG(ERROR) << "Construct NNCompilation failed"; - OH_NNModel_Destroy(&oh_nnmodel); - return mindspore::kLiteError; + OH_NNModel_Destroy(&nn_model); + return kLiteError; } - MS_LOG(INFO) << "NNRTDelegate creates NNCompilation success."; + MS_LOG(DEBUG) << "NNRTDelegate creates NNCompilation success."; - const size_t *allDevicesID = nullptr; - uint32_t device_count = 0; - ret_code = OH_NNDevice_GetAllDevicesID(&allDevicesID, &device_count); - if (ret_code != OH_NN_SUCCESS) { - MS_LOG(ERROR) << "NNModel GetAllDevicesID failed, OH_NN_ReturnCode = " << ret_code; - OH_NNCompilation_Destroy(&oh_nn_compilation); - OH_NNModel_Destroy(&oh_nnmodel); - return mindspore::kLiteError; + auto ret_code = InitNNCompilation(nn_compilation); + if (ret_code != kSuccess) { + MS_LOG(ERROR) << "Init NNCompilation failed"; + OH_NNModel_Destroy(&nn_model); + OH_NNCompilation_Destroy(&nn_compilation); + return kLiteError; } + OH_NNModel_Destroy(&nn_model); - if (device_count <= 0) { - MS_LOG(WARNING) << "No NNRt Device found, fall back to CPU. "; - // OH_NNCompilation_Destroy(&oh_nn_compilation); - // OH_NNModel_Destroy(&oh_nnmodel); - return mindspore::kSuccess; + OH_NNExecutor *nn_executor = nullptr; + nn_executor = OH_NNExecutor_Construct(nn_compilation); + if (nn_executor == nullptr) { + MS_LOG(ERROR) << "Construct NNExecutor failed, ret: " << ret_code; + OH_NNCompilation_Destroy(&nn_compilation); + return kLiteError; } - MS_LOG(INFO) << "NNRTDelegate GetAllDevicesID success."; + OH_NNCompilation_Destroy(&nn_compilation); - // check if model ops are supported - const bool *issupported = nullptr; + auto nnrt_model_kernel = new (std::nothrow)NNRTModelKernel(nn_executor, model->inputs(), model->outputs()); + if (nnrt_model_kernel == nullptr) { + OH_NNExecutor_Destroy(&nn_executor); + MS_LOG(ERROR) << "new NNRTModelKernel failed"; + return kLiteError; + } + model->Replace(model->BeginKernelIterator(), model->EndKernelIterator(), nnrt_model_kernel); + return kSuccess; +} +#endif + +Status NNRTDelegate::BuildNormalModel(DelegateModel *model) { + MS_LOG(DEBUG) << "Start to build NNRT model."; + if ((lite_graph_ == nullptr) || (lite_graph_->sub_graphs_.size() > 1)) { + MS_LOG(WARNING) << "LiteGraph contains more than one subgraph. NNRT does not support control-flow model yet, fallback to CPU"; + return kSuccess; + } + + OH_NNModel *full_model = CreateFullNNModel(); + if (full_model == nullptr) { + MS_LOG(WARNING) << "Build full NNModel failed, fallback to CPU"; + return kSuccess; + } + std::vector op_supports = QueryOpSupports(full_model); + if (op_supports.empty()) { + MS_LOG(WARNING) << "Query no op supports for full model, fallback to CPU"; + OH_NNModel_Destroy(&full_model); + return kSuccess; + } + auto nnrt_subgraph_ranges = GetNNRTSubgraphRanges(model, op_supports); + MS_LOG(INFO) << "Found NNRT subgraph count: " << nnrt_subgraph_ranges.size(); + + std::vector sub_lite_graphs; + auto ret = CreateLiteGraphForNNRTSubgraph(nnrt_subgraph_ranges, &sub_lite_graphs); + if (ret != kSuccess) { + OH_NNModel_Destroy(&full_model); + MS_LOG(WARNING) << "Create NNRT sub LiteGraph failed, fallback to CPU"; + return kSuccess; + } + + std::vector nnrt_subgraph_kernels; + ret = CreateNNRTSubgraphKernels(model, sub_lite_graphs, nnrt_subgraph_ranges, &nnrt_subgraph_kernels); + if (ret != kSuccess) { + OH_NNModel_Destroy(&full_model); + MS_LOG(WARNING) << "Create NNRT subgraph kernel failed, fallback to CPU"; + return kSuccess; + } + + ReplaceNNRTKernelsInDelegateModel(model, nnrt_subgraph_ranges, nnrt_subgraph_kernels); + OH_NNModel_Destroy(&full_model); + MS_LOG(INFO) << "NNRTDelegate build success."; + return kSuccess; +} + +OH_NNModel *NNRTDelegate::CreateFullNNModel() { + if (lite_graph_ == nullptr) { + MS_LOG(ERROR) << "Lite graph is null"; + return nullptr; + } + + if (lite_graph_->sub_graphs_.empty()) { + MS_LOG(ERROR) << "Lite graph must have at lease one subgraph"; + return nullptr; + } + + OH_NNModel *nn_model = OH_NNModel_Construct(); + if (nn_model == nullptr) { + MS_LOG(ERROR) << "Create NNModel failed, result is nullptr"; + return nullptr; + } + + auto ret = OH_NNModel_BuildFromLiteGraph(nn_model, lite_graph_); + if (ret != OH_NN_SUCCESS) { + MS_LOG(ERROR) << "Build NNModel failed, ret: " << ret; + OH_NNModel_Destroy(&nn_model); + return nullptr; + } + return nn_model; +} + +std::vector NNRTDelegate::QueryOpSupports(OH_NNModel *nn_model) { + const bool *is_supported = nullptr; // Note: this memory is owned by nn_model, don't free alone. uint32_t op_count = 0; - ret_code = OH_NNModel_GetAvailableOperations(oh_nnmodel, allDevicesID[0], &issupported, &op_count); - if (ret_code != OH_NN_SUCCESS) { - MS_LOG(ERROR) << "NNModel GetAvailableOperations failed, OH_NN_ReturnCode = " << ret_code - << ", maybe due to dataParcel data length limitaion. Fall back to CPU."; - OH_NNCompilation_Destroy(&oh_nn_compilation); - OH_NNModel_Destroy(&oh_nnmodel); - return mindspore::kSuccess; + auto ret = OH_NNModel_GetAvailableOperations(nn_model, nnrt_device_info_.device_id_, &is_supported, &op_count); + if (ret != OH_NN_SUCCESS) { + MS_LOG(WARNING) << "NNModel GetAvailableOperations failed, ret: " << ret + << ", maybe caused by dataParcel data length limitation"; + return {}; } - uint32_t supported_op_count = 0; - for (uint32_t i = 0; i < op_count; i++) { - if (issupported[i]) { - supported_op_count++; + std::vector op_supports(is_supported, is_supported + op_count); + return op_supports; +} + +/* Find continuous sub-sequence in op_supports. */ +std::vector NNRTDelegate::GetNNRTSubgraphRanges(DelegateModel *model, + const std::vector &op_supports) { + std::vector nnrt_subgraph_ranges; + NNRTOpRange op_range; + bool start_count = false; + for (size_t i = 0; i < op_supports.size(); i++) { + if (op_supports[i]) { + if (start_count == false) { + start_count = true; + op_range.begin_index_ = i; + op_range.begin_iter_ = model->BeginKernelIterator() + i; + } + } else { + if (start_count == true) { + start_count = false; + op_range.end_index_ = i; + op_range.end_iter_ = model->BeginKernelIterator() + i; + nnrt_subgraph_ranges.push_back(op_range); + } } } - if (op_count != supported_op_count) { - MS_LOG(WARNING) << "this model has " << op_count << "ops, but NNRT only support " << supported_op_count - << " ops, fall back to CPU."; - // must support all op, else fall back to CPU - OH_NNCompilation_Destroy(&oh_nn_compilation); - OH_NNModel_Destroy(&oh_nnmodel); - return mindspore::kSuccess; + // handle last true subsequence + if (start_count == true) { + op_range.end_index_ = op_supports.size(); + op_range.end_iter_ = model->EndKernelIterator(); + nnrt_subgraph_ranges.push_back(op_range); + MS_LOG(INFO) << "Schedule NNRT subgraph range: [" << op_range.begin_index_ << ", " << op_range.end_index_ << ")"; } - MS_LOG(INFO) << "NNRtDelegate supports all op in this model."; + return nnrt_subgraph_ranges; +} + +/** + * This method ONLY works when the follow pre-conditions are satisfied: + * 1. The node order of lite_graph_->all_nodes should be consistent with DelegateModel sequence. + * This ensures the kernel replacement in DelegateModel based on the re-organizing info from lite_graph_ is correct. + * 2. The node indices of lite_graph_->sub_graphs[0].node_indices should be monotonically increasing from 0 to size - 1. + */ +Status NNRTDelegate::CreateLiteGraphForNNRTSubgraph( + const std::vector &nnrt_op_ranges, + std::vector *sub_lite_graphs) { + MS_LOG(INFO) << "Start creating LiteGraph for NNRT subgraph"; + for (const auto &op_range: nnrt_op_ranges) { + MS_LOG(INFO) << "Process op range: [" << op_range.begin_index_ << ", " << op_range.end_index_ << ")"; + LiteGraph *sub_lite_graph = new (std::nothrow)LiteGraph; + if (sub_lite_graph == nullptr) { + MS_LOG(ERROR) << "Allocate LiteGraph failed"; + return kLiteError; + } + sub_lite_graph->name_ = lite_graph_->name_; + sub_lite_graph->version_ = lite_graph_->version_; - ret_code = OH_NNCompilation_SetDevice(oh_nn_compilation, allDevicesID[0]); + auto sub_graph = new (std::nothrow)LiteGraph::SubGraph; + if (sub_graph == nullptr) { + MS_LOG(ERROR) << "Allocate SubGraph failed"; + return kLiteError; + } + sub_graph->name_ = lite_graph_->name_; + sub_lite_graph->sub_graphs_.push_back(sub_graph); + // deal with all_nodes + MS_LOG(INFO) << "Assemble all_nodes..."; + int new_node_index = 0; + std::map in_tensor_index_map; + std::map out_tensor_index_map; + for (size_t index = op_range.begin_index_; index < op_range.end_index_; index++) { + LiteGraph::Node *node = new (std::nothrow)LiteGraph::Node; + if (node == nullptr) { + MS_LOG(ERROR) << "Allocate Node failed"; + return kLiteError; + } + *node = *lite_graph_->all_nodes_[index]; + sub_lite_graph->all_nodes_.push_back(node); + sub_graph->node_indices_.push_back(new_node_index++); + + for (auto i: node->input_indices_) { + in_tensor_index_map.emplace(i, lite_graph_->all_tensors_[i]); + } + for (auto i: node->output_indices_) { + out_tensor_index_map.emplace(i, lite_graph_->all_tensors_[i]); + } + } + + // deal with all_tensors + MS_LOG(INFO) << "Assemble all_tensors..."; + std::set tensors; + for (auto iter: in_tensor_index_map) { + tensors.emplace(iter.second); + } + for (auto iter: out_tensor_index_map) { + tensors.emplace(iter.second); + } + + uint32_t new_index = 0; + std::map new_tensor_maps; + for (auto tensor: tensors) { + new_tensor_maps.emplace(tensor, new_index++); + } + + sub_lite_graph->all_tensors_ = std::vector(tensors.begin(), tensors.end()); + + // deal with every node's input/output indices + MS_LOG(INFO) << "Set input/output indices of each node..."; + for (auto node: sub_lite_graph->all_nodes_) { + for (auto &index : node->input_indices_) { + index = new_tensor_maps.at(in_tensor_index_map.at(index)); + } + for (auto &index : node->output_indices_) { + index = new_tensor_maps.at(out_tensor_index_map.at(index)); + } + } + + // deal with subgraph's input/output indices + MS_LOG(INFO) << "Set input/output indices of each subgraph..."; + sub_graph->tensor_indices_ = std::vector(tensors.size()); + std::iota(sub_graph->tensor_indices_.begin(), sub_graph->tensor_indices_.end(), 0U); + + for (auto iter: in_tensor_index_map) { + auto new_tensor_index = new_tensor_maps[iter.second]; + MS_LOG(DEBUG) << "handle input: old: " << iter.first << ", new: " << new_tensor_index << std::endl; + if (IsConstTensor(*iter.second)) { + MS_LOG(DEBUG) << "- tensor: " << new_tensor_index << " is const." << std::endl; + continue; + } + + bool is_subgraph_input = true; + for (auto node: sub_lite_graph->all_nodes_) { + if (std::find(node->output_indices_.begin(), node->output_indices_.end(), new_tensor_index) != + node->output_indices_.end()) { + is_subgraph_input = false; + MS_LOG(DEBUG) << "- tensor: " << new_tensor_index << " is not subgraph input." << std::endl; + break; + } + } + if (is_subgraph_input) { + sub_graph->input_indices_.push_back(new_tensor_index); + MS_LOG(DEBUG) << "- select tensor: " << new_tensor_index << " as subgraph input." << std::endl; + } + } + + for (auto iter: out_tensor_index_map) { + int new_tensor_index = new_tensor_maps.at(iter.second); + MS_LOG(DEBUG) << "handle output: old: " << iter.first << ", new: " << new_tensor_index << std::endl; + if (IsConstTensor(*iter.second)) { + MS_LOG(DEBUG) << "- tensor: " << new_tensor_index << " is const." << std::endl; + continue; + } + + bool is_subgraph_output = false; + for (size_t i = 0; i < lite_graph_->all_nodes_.size(); i++) { + if ((i >= op_range.begin_index_) && (i < op_range.end_index_)) { + continue; + } + auto node = lite_graph_->all_nodes_[i]; + if (std::find(node->input_indices_.begin(), node->input_indices_.end(), iter.first) != + node->input_indices_.end()) { // As the input of node which does not belong to the subgraph. + is_subgraph_output = true; + MS_LOG(DEBUG) << "- tensor: " << new_tensor_index << " is original subgraph output. node: " << node->primitive_ << std::endl; + break; + } + } + bool is_graph_output = (std::find(lite_graph_->output_indices_.begin(),lite_graph_->output_indices_.end(), + iter.first) != lite_graph_->output_indices_.end()); + if (is_graph_output) { + MS_LOG(DEBUG) << "- tensor: " << new_tensor_index << " is graph output." << std::endl; + } + if (is_subgraph_output || is_graph_output) { + sub_graph->output_indices_.push_back(new_tensor_index); + MS_LOG(DEBUG) << "- select tensor: " << new_tensor_index << " as subgraph output." << std::endl; + } + } + + // deal with full-graph's input/output indices + sub_lite_graph->input_indices_ = sub_graph->input_indices_; + sub_lite_graph->output_indices_ = sub_graph->output_indices_; + sub_lite_graphs->push_back(sub_lite_graph); + } + MS_LOG(INFO) << "Finished creating LiteGraph for NNRT subgraph"; + return kSuccess; +} + +struct TensorLocation { + uint32_t node_index; // the index of node which the tensor belongs to. + uint32_t tensor_index; // the index of node in/out tensors which the tensor is located at. +}; + +Status NNRTDelegate::InitNNCompilation(OH_NNCompilation *nn_compilation) const { + auto ret_code = OH_NNCompilation_SetDevice(nn_compilation, nnrt_device_info_.device_id_); if (ret_code != OH_NN_SUCCESS) { - MS_LOG(ERROR) << "NNCompilation SetDevice failed, OH_NN_ReturnCode = " << ret_code; - OH_NNCompilation_Destroy(&oh_nn_compilation); - OH_NNModel_Destroy(&oh_nnmodel); - return mindspore::kLiteError; + MS_LOG(ERROR) << "NNCompilation set device id failed, ret: " << ret_code; + return kLiteError; + } + ret_code = OH_NNCompilation_SetPerformanceMode(nn_compilation, + (OH_NN_PerformanceMode)(nnrt_device_info_.performance_mode_)); + if ((ret_code != OH_NN_SUCCESS) && (ret_code != OH_NN_OPERATION_FORBIDDEN)) { + MS_LOG(ERROR) << "NNCompilation set performance mode failed, ret: " << ret_code; + return kLiteError; + } + ret_code = OH_NNCompilation_SetPriority(nn_compilation, (OH_NN_Priority)(nnrt_device_info_.priority_)); + if ((ret_code != OH_NN_SUCCESS) && (ret_code != OH_NN_OPERATION_FORBIDDEN)) { + MS_LOG(ERROR) << "NNCompilation set priority failed, ret: " << ret_code; + return kLiteError; + } + ret_code = OH_NNCompilation_EnableFloat16(nn_compilation, nnrt_device_info_.enable_fp16_); + if ((ret_code != OH_NN_SUCCESS) && (ret_code != OH_NN_OPERATION_FORBIDDEN)) { + MS_LOG(ERROR) << "NNCompilation enable fp16 failed, ret: " << ret_code; + return kLiteError; } - ret_code = OH_NNCompilation_Build(oh_nn_compilation); + if (!cache_path_.empty()) { // Set cache path if user indeed set it. + ret_code = OH_NNCompilation_SetCache(nn_compilation, cache_path_.c_str(), cache_version_); + if ((ret_code != OH_NN_SUCCESS) && (ret_code != OH_NN_OPERATION_FORBIDDEN)) { + MS_LOG(ERROR) << "NNCompilation set cache failed, ret: " << ret_code; + return kLiteError; + } + } + ret_code = OH_NNCompilation_Build(nn_compilation); if (ret_code != OH_NN_SUCCESS) { - MS_LOG(ERROR) << "Build NNCompilation failed, OH_NN_ReturnCode = " << ret_code; - OH_NNCompilation_Destroy(&oh_nn_compilation); - OH_NNModel_Destroy(&oh_nnmodel); - return mindspore::kLiteError; - } - - MS_LOG(DEBUG) << "NNRTDelegate SetDevice success."; - - OH_NNExecutor *oh_nn_executor = nullptr; - oh_nn_executor = OH_NNExecutor_Construct(oh_nn_compilation); - if (oh_nn_executor == nullptr) { - MS_LOG(ERROR) << "Construct NNCompilation SetDevice failed, OH_NN_ReturnCode = " << ret_code; - OH_NNCompilation_Destroy(&oh_nn_compilation); - OH_NNModel_Destroy(&oh_nnmodel); - return mindspore::kLiteError; - } - MS_LOG(DEBUG) << "NNRTDelegate creates NNExecutor success."; - mindspore::Status prepare_data_ret; - auto nnr_model_kernel = new (std::nothrow) NNRTModelKernel(oh_nn_executor, model->inputs(), model->outputs()); - if (nnr_model_kernel == nullptr) { - MS_LOG(ERROR) << "new NNRTModelKernel failed"; - return mindspore::kLiteError; + MS_LOG(ERROR) << "Build NNCompilation failed, ret: " << ret_code; + return kLiteError; } - OH_NNCompilation_Destroy(&oh_nn_compilation); - OH_NNModel_Destroy(&oh_nnmodel); - KernelIter from = model->BeginKernelIterator(); - KernelIter end = model->EndKernelIterator(); - model->Replace(from, end, nnr_model_kernel); + return kSuccess; +} + +Status NNRTDelegate::CreateNNRTSubgraphKernels(DelegateModel *model, + const std::vector &sub_lite_graphs, const std::vector &nnrt_subgraph_ranges, + std::vector *nnrt_subgraph_kernels) { + for (size_t i = 0; i < sub_lite_graphs.size(); i++) { + auto sub_lite_graph = sub_lite_graphs[i]; + + OH_NNModel *nn_model = OH_NNModel_Construct(); + auto ret = OH_NNModel_BuildFromLiteGraph(nn_model, sub_lite_graph); + if (ret != OH_NN_SUCCESS) { + MS_LOG(ERROR) << "Build NNModel failed, ret: " << ret; + OH_NNModel_Destroy(&nn_model); + return kLiteError; + } - MS_LOG(INFO) << "NNRTDelegate build success."; - return mindspore::kSuccess; + OH_NNCompilation *nn_compilation = OH_NNCompilation_Construct(nn_model); + if (nn_compilation == nullptr) { + MS_LOG(ERROR) << "Construct NNCompilation failed"; + OH_NNModel_Destroy(&nn_model); + return kLiteError; + } + MS_LOG(DEBUG) << "NNRTDelegate creates NNCompilation success."; + + auto ret_code = InitNNCompilation(nn_compilation); + if (ret_code != kSuccess) { + MS_LOG(ERROR) << "Init NNCompilation failed"; + OH_NNCompilation_Destroy(&nn_compilation); + OH_NNModel_Destroy(&nn_model); + return kLiteError; + } + + OH_NNExecutor *nn_executor = nullptr; + nn_executor = OH_NNExecutor_Construct(nn_compilation); + if (nn_executor == nullptr) { + MS_LOG(ERROR) << "Construct NNExecutor failed, ret: " << ret_code; + OH_NNCompilation_Destroy(&nn_compilation); + OH_NNModel_Destroy(&nn_model); + return kLiteError; + } + MS_LOG(DEBUG) << "NNRTDelegate creates NNExecutor success."; + + bool format_not_support = false; + std::vector in_tensors; + for (auto index: sub_lite_graph->sub_graphs_[0]->input_indices_) { + TensorLocation location; + for (auto node_index: sub_lite_graph->sub_graphs_[0]->node_indices_) { + auto node = sub_lite_graph->all_nodes_[node_index]; + auto iter = std::find(node->input_indices_.begin(), node->input_indices_.end(), index); + if (iter != node->input_indices_.end()) { + uint32_t tensor_index = iter - node->input_indices_.begin(); + location.node_index = node_index; + location.tensor_index = tensor_index; + MS_LOG(INFO) << "Found graph input index: " << index << " is the " << tensor_index << "th input of the node " << node->primitive_; + break; + } + } + KernelIter kernel_iter = nnrt_subgraph_ranges[i].begin_iter_ + location.node_index; + in_tensors.push_back((*kernel_iter)->inputs()[location.tensor_index]); + if (in_tensors.back().format() != Format::NHWC) { + format_not_support = true; + break ; + } + } + + std::vector out_tensors; + for (auto index: sub_lite_graph->sub_graphs_[0]->output_indices_) { + TensorLocation location; + for (auto node_index: sub_lite_graph->sub_graphs_[0]->node_indices_) { + auto node = sub_lite_graph->all_nodes_[node_index]; + auto iter = std::find(node->output_indices_.begin(), node->output_indices_.end(), index); + if (iter != node->output_indices_.end()) { + uint32_t tensor_index = iter - node->output_indices_.begin(); + location.node_index = node_index; + location.tensor_index = tensor_index; + MS_LOG(INFO) << "Found graph output index: " << index << " is the " << tensor_index << "th output of the node " << node->primitive_; + break; + } + } + KernelIter kernel_iter = nnrt_subgraph_ranges[i].begin_iter_ + location.node_index; + out_tensors.push_back((*kernel_iter)->outputs()[location.tensor_index]); + if (out_tensors.back().format() != Format::NHWC) { + format_not_support = true; + break ; + } + } + if (format_not_support) { + MS_LOG(WARNING) << "Not support in/out tensor format, skip this subgraph"; + OH_NNCompilation_Destroy(&nn_compilation); + OH_NNModel_Destroy(&nn_model); + nnrt_subgraph_kernels->push_back(nullptr); + continue ; + } + + auto nnrt_model_kernel = new (std::nothrow)NNRTModelKernel(nn_executor, in_tensors, out_tensors); + if (nnrt_model_kernel == nullptr) { + MS_LOG(ERROR) << "new NNRTModelKernel failed"; + return kLiteError; + } + OH_NNCompilation_Destroy(&nn_compilation); + OH_NNModel_Destroy(&nn_model); + nnrt_subgraph_kernels->push_back(nnrt_model_kernel); + } + return kSuccess; } -mindspore::Status mindspore::NNRTDelegate::Init() { - MS_LOG(DEBUG) << "NNRTDelegate init success."; - return mindspore::kSuccess; +void NNRTDelegate::ReplaceNNRTKernelsInDelegateModel(DelegateModel *model, + const std::vector &nnrt_subgraph_ranges, + const std::vector &nnrt_subgraph_kernels) { + // Here we perform the replacement from back to front intentionally! If replace from front to end, the kernel + // sequence would shrink and the later begin_iter_/end_iter_ may be erased already. + for (int i = nnrt_subgraph_ranges.size() - 1; i >= 0; i--) { + if (nnrt_subgraph_kernels[i] == nullptr) { + continue; + } + auto from = nnrt_subgraph_ranges[i].begin_iter_; + auto end = nnrt_subgraph_ranges[i].end_iter_; + (void)model->Replace(from, end, nnrt_subgraph_kernels[i]); + MS_LOG(INFO) << "Replace nnrt subgraph kernel in range: [" << (from - model->BeginKernelIterator()) + << ", " << (end - model->BeginKernelIterator()) << ")"; + } } -mindspore::Status mindspore::NNRTDelegate::PrepareInputs(DelegateModel *model, - OH_NNExecutor *oh_nn_executor) { + +Status NNRTDelegate::PrepareInputs(DelegateModel *model, + OH_NNExecutor *oh_nn_executor) { auto input_tensors = model->inputs(); for (size_t i = 0; i < input_tensors.size(); i++) { auto tensor = input_tensors[i]; @@ -161,10 +654,10 @@ mindspore::Status mindspore::NNRTDelegate::PrepareInputs(DelegateModel scale; std::vector zero_point; if (!tmp_quant_param.empty()) { - quant_param = new (std::nothrow) OH_NN_QuantParam; + quant_param = new(std::nothrow) OH_NN_QuantParam; if (quant_param == nullptr) { MS_LOG(ERROR) << "new OH_NN_QuantParam failed."; - return mindspore::kLiteError; + return kLiteError; } for (auto qparam : tmp_quant_param) { bit_num.emplace_back(qparam.bit_num); @@ -176,12 +669,12 @@ mindspore::Status mindspore::NNRTDelegate::PrepareInputs(DelegateModelscale = scale.data(); quant_param->zeroPoint = zero_point.data(); } - auto oprend = new (std::nothrow) OH_NN_Tensor; + auto oprend = new(std::nothrow) OH_NN_Tensor; if (oprend == nullptr) { MS_LOG(ERROR) << "new OH_NN_Tensor Failed"; - return mindspore::kLiteError; + return kLiteError; } - oprend->dataType = ConvertDataType(tensor.DataType()); + oprend->dataType = CastToNNRTDataType(tensor.DataType()); oprend->dimensionCount = tensor_shape.size(); std::vector dimensions_list; @@ -191,14 +684,14 @@ mindspore::Status mindspore::NNRTDelegate::PrepareInputs(DelegateModel subgraph_list; for (auto subgraph : lite_graph.sub_graphs_) { - auto new_subgraph = new (std::nothrow) LiteGraph::SubGraph; + auto new_subgraph = new(std::nothrow) LiteGraph::SubGraph; if (new_subgraph == nullptr) { MS_LOG(ERROR) << "new LiteGraph::Subgraph failed."; return; @@ -331,30 +795,32 @@ void mindspore::NNRTDelegate::ShallowCopyLiteGraph(const mindspore::lite::LiteGr } for (auto tensor : lite_graph.all_tensors_) { ret = lite::CheckTensorSupported(static_cast(tensor)); - if (ret == mindspore::kLiteError) { + if (ret == kLiteError) { MS_LOG(ERROR) << "tensor supported check failed."; return; } } - nnrt_lite_graph = new (std::nothrow) lite::LiteGraph(); - if (nnrt_lite_graph == nullptr) { + lite_graph_ = new(std::nothrow) lite::LiteGraph(); + if (lite_graph_ == nullptr) { MS_LOG(ERROR) << "new LiteGraph failed."; return; } - nnrt_lite_graph->name_ = lite_graph.name_; - nnrt_lite_graph->version_ = lite_graph.version_; - nnrt_lite_graph->input_indices_ = lite_graph.input_indices_; - nnrt_lite_graph->output_indices_ = lite_graph.output_indices_; - nnrt_lite_graph->all_tensors_ = lite_graph.all_tensors_; - nnrt_lite_graph->all_nodes_ = node_list; - nnrt_lite_graph->sub_graphs_ = subgraph_list; + lite_graph_->name_ = lite_graph.name_; + lite_graph_->version_ = lite_graph.version_; + lite_graph_->input_indices_ = lite_graph.input_indices_; + lite_graph_->output_indices_ = lite_graph.output_indices_; + lite_graph_->all_tensors_ = lite_graph.all_tensors_; + lite_graph_->all_nodes_ = node_list; + lite_graph_->sub_graphs_ = subgraph_list; MS_LOG(INFO) << "ShallowCopyLiteGraph success."; } -mindspore::NNRTDelegate::~NNRTDelegate() { - if (this->nnrt_lite_graph != nullptr) { +NNRTDelegate::~NNRTDelegate() { + if (lite_graph_ != nullptr) { MS_LOG(ERROR) << "Delete NNRTDelegate."; } -}; +} +} // namespace lite +} // namespace mindspore diff --git a/mindspore/lite/src/litert/delegate/nnrt/nnrt_delegate.h b/mindspore/lite/src/litert/delegate/nnrt/nnrt_delegate.h index c2847704..52626339 100644 --- a/mindspore/lite/src/litert/delegate/nnrt/nnrt_delegate.h +++ b/mindspore/lite/src/litert/delegate/nnrt/nnrt_delegate.h @@ -15,37 +15,81 @@ */ #ifndef MINDSPORE_NNR_DELEGATE_H #define MINDSPORE_NNR_DELEGATE_H + #include #include #include "include/api/delegate.h" #include "include/model.h" -#include "interfaces/kits/c/neural_network_runtime_type.h" -namespace mindspore { +#include "src/litert/inner_context.h" +#include "nnrt_model_kernel.h" +#include "schema/model_generated.h" +#include "interfaces/kits/c/neural_network_runtime/neural_network_runtime_type.h" +#include "interfaces/kits/c/neural_network_runtime/neural_network_runtime.h" +#include "interfaces/innerkits/c/neural_network_runtime_inner.h" -using namespace lite; +namespace mindspore { +namespace lite { +struct NNRTOpRange { + /* NNRT kernel range in DelegateModel: [begin_iter_, end_iter_) */ + KernelIter begin_iter_; + KernelIter end_iter_; + /* NNRT node range in lite_graph_: [begin_index_, end_index_) */ + size_t begin_index_; + size_t end_index_; +}; class NNRTDelegate : public Delegate { public: - NNRTDelegate() : Delegate(){}; - + NNRTDelegate() = default; + NNRTDelegate(const NNRtDeviceInfo &nnrt_device_info) : nnrt_device_info_(nnrt_device_info) {} ~NNRTDelegate() override; - - Status Init() override; - + Status Init() override { return kSuccess; } Status Build(DelegateModel *model) override; - void ShallowCopyLiteGraph(const lite::LiteGraph &liteGraph); - - protected: - LiteGraph *nnrt_lite_graph = nullptr; + void SetMetaGraph(const void *meta_graph) { + meta_graph_ = meta_graph; + } + static std::vector GetNNRTSubgraphRanges(DelegateModel *model, + const std::vector &op_supports); private: - // static LiteGraph* CreateLiteGraph(const LiteGraph &liteGraph); + void InitCachePath(); + Status BuildNormalModel(DelegateModel *model); + OH_NNModel *CreateFullNNModel(); + std::vector QueryOpSupports(OH_NNModel *nn_model); + Status CreateLiteGraphForNNRTSubgraph( + const std::vector &nnrt_op_ranges, + std::vector *sub_lite_graphs); + Status CreateNNRTSubgraphKernels( + DelegateModel *model, + const std::vector &sub_lite_graphs, + const std::vector &nnrt_subgraph_ranges, + std::vector *nnrt_subgraph_kernels); + void ReplaceNNRTKernelsInDelegateModel(DelegateModel *model, + const std::vector &nnrt_subgraph_ranges, + const std::vector &nnrt_subgraph_kernels); Status PrepareInputs(DelegateModel *model, OH_NNExecutor *oh_nn_executor); Status PrepareOutputs(DelegateModel *model, OH_NNExecutor *oh_nn_executor); - OH_NN_DataType ConvertDataType(mindspore::DataType data_type); -}; + Status InitNNCompilation(OH_NNCompilation *nn_compilation) const; + static OH_NN_DataType CastToNNRTDataType(mindspore::DataType data_type); + static OH_NN_Format CastToNNRTFormat(Format format); + bool IsCustomModel() const; + +#ifdef SUPPORT_NNRT_METAGRAPH + bool IsKirinNPU() const; + Status BuildKirinNPUModel(DelegateModel *model); + Status SetKirinModelInputsAndOutputs(OH_NNModel *nn_model); + std::vector CreateNNTensorInfos(const std::vector &indices) const; + Status CreateFullModelKernel(DelegateModel *model, OH_NNModel *nn_model); +#endif + NNRtDeviceInfo nnrt_device_info_; + LiteGraph *lite_graph_ = nullptr; + const void *meta_graph_ = nullptr; + std::string cache_path_ = ""; + uint32_t cache_version_ = 0; +}; +} // namespace lite } // namespace mindspore #endif // MINDSPORE_NNR_DELEGATE_H diff --git a/mindspore/lite/src/litert/delegate/nnrt/nnrt_model_kernel.cc b/mindspore/lite/src/litert/delegate/nnrt/nnrt_model_kernel.cc index 5acf2e9a..67443e08 100644 --- a/mindspore/lite/src/litert/delegate/nnrt/nnrt_model_kernel.cc +++ b/mindspore/lite/src/litert/delegate/nnrt/nnrt_model_kernel.cc @@ -97,7 +97,7 @@ OH_NN_DataType mindspore::NNRTModelKernel::ConvertDataType(mindspore::DataType d } int mindspore::NNRTModelKernel::PrepareInputs() { auto input_tensors = this->inputs(); - for (int i = 0; i < input_tensors.size(); i++) { + for (size_t i = 0; i < input_tensors.size(); i++) { auto tensor = input_tensors[i]; auto tensor_shape = tensor.Shape(); auto tmp_quant_param = tensor.QuantParams(); @@ -142,6 +142,7 @@ int mindspore::NNRTModelKernel::PrepareInputs() { oprend->dimensions = dimensions_list.data(); oprend->quantParam = quant_param; oprend->type = OH_NN_TENSOR; + MS_LOG_INFO << "input tensor: " << tensor.Name() << ", data: " << (void *)tensor.MutableData() << ", size: " << tensor.DataSize(); OH_NN_ReturnCode ret_code = OH_NNExecutor_SetInput(oh_nn_executor, i, oprend, tensor.MutableData(), tensor.DataSize()); delete (oprend); diff --git a/mindspore/lite/src/litert/delegate/nnrt/nnrt_model_kernel.h b/mindspore/lite/src/litert/delegate/nnrt/nnrt_model_kernel.h index cf9481df..ea15f7ca 100644 --- a/mindspore/lite/src/litert/delegate/nnrt/nnrt_model_kernel.h +++ b/mindspore/lite/src/litert/delegate/nnrt/nnrt_model_kernel.h @@ -20,7 +20,7 @@ #include #include #include "include/api/kernel.h" -#include "interfaces/kits/c/neural_network_runtime.h" +#include "interfaces/kits/c/neural_network_runtime/neural_network_runtime.h" #include "src/common/log_adapter.h" #include "include/errorcode.h" diff --git a/mindspore/lite/src/litert/delegate/nnrt/nnrt_stub.cc b/mindspore/lite/src/litert/delegate/nnrt/nnrt_stub.cc new file mode 100644 index 00000000..8ac283af --- /dev/null +++ b/mindspore/lite/src/litert/delegate/nnrt/nnrt_stub.cc @@ -0,0 +1,99 @@ +/** +* Copyright 2023 Huawei Technologies Co., Ltd +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*/ + +#include "interfaces/kits/c/neural_network_runtime/neural_network_runtime.h" +#include "interfaces/innerkits/c/neural_network_runtime_inner.h" + +OH_NNModel *OH_NNModel_Construct(void) { + return NULL; +} + +OH_NN_ReturnCode OH_NNExecutor_Run(OH_NNExecutor *executor) { + return OH_NN_SUCCESS; +} + +OH_NN_ReturnCode OH_NNCompilation_Build(OH_NNCompilation *compilation) { + return OH_NN_SUCCESS; +} + +void OH_NNCompilation_Destroy(OH_NNCompilation **compilation) {} + +OH_NNExecutor *OH_NNExecutor_Construct(OH_NNCompilation *compilation) { + return NULL; +} + +void OH_NNExecutor_Destroy(OH_NNExecutor **executor) {} + +OH_NNCompilation *OH_NNCompilation_Construct(const OH_NNModel *model) { + return NULL; +} + +OH_NN_ReturnCode OH_NNDevice_GetAllDevicesID(const size_t **allDevicesID, uint32_t *deviceCount) { + return OH_NN_SUCCESS; +} + +OH_NN_ReturnCode OH_NNExecutor_SetOutput(OH_NNExecutor *executor, + uint32_t outputIndex, + void *dataBuffer, + size_t length) { + return OH_NN_SUCCESS; +} + +OH_NN_ReturnCode OH_NNCompilation_SetDevice(OH_NNCompilation *compilation, size_t deviceID) { + return OH_NN_SUCCESS; +} + +OH_NN_ReturnCode OH_NNExecutor_SetInput(OH_NNExecutor *executor, + uint32_t inputIndex, + const OH_NN_Tensor *tensor, + const void *dataBuffer, + size_t length) { + return OH_NN_SUCCESS; +} + +void OH_NNModel_Destroy(OH_NNModel **model) {} + +OH_NN_ReturnCode OH_NNModel_GetAvailableOperations(OH_NNModel *model, + size_t deviceID, + const bool **isSupported, + uint32_t *opCount) { + return OH_NN_SUCCESS; +} + +OH_NN_ReturnCode OH_NNModel_BuildFromLiteGraph(OH_NNModel *model, const void *liteGraph) { + return OH_NN_SUCCESS; +} + +OH_NN_ReturnCode OH_NNDevice_GetName(size_t deviceID, const char **name) { + return OH_NN_SUCCESS; +} + +OH_NN_ReturnCode OH_NNDevice_GetType(size_t deviceID, OH_NN_DeviceType *deviceType) { + return OH_NN_SUCCESS; +} + +OH_NN_ReturnCode OH_NNCompilation_SetPriority(OH_NNCompilation *compilation, OH_NN_Priority priority) { + return OH_NN_SUCCESS; +} + +OH_NN_ReturnCode OH_NNCompilation_EnableFloat16(OH_NNCompilation *compilation, bool enableFloat16) { + return OH_NN_SUCCESS; +} + +OH_NN_ReturnCode OH_NNCompilation_SetPerformanceMode(OH_NNCompilation *compilation, + OH_NN_PerformanceMode performanceMode) { + return OH_NN_SUCCESS; +} \ No newline at end of file diff --git a/mindspore/lite/src/litert/infer_manager.cc b/mindspore/lite/src/litert/infer_manager.cc index 2b21d1ca..908ab122 100644 --- a/mindspore/lite/src/litert/infer_manager.cc +++ b/mindspore/lite/src/litert/infer_manager.cc @@ -162,7 +162,8 @@ int KernelInferShape(const std::vector &inputs, const std::vecto if (parameter->type_ == static_cast(schema::PrimitiveType_PartialFusion) || parameter->type_ == static_cast(schema::PrimitiveType_Switch) || parameter->type_ == static_cast(schema::PrimitiveType_Call) || - parameter->type_ == static_cast(schema::PrimitiveType_SwitchLayer)) { + parameter->type_ == static_cast(schema::PrimitiveType_SwitchLayer) || + parameter->type_ == static_cast(PrimType_Inner_ThirdPartyModel)) { MS_LOG(INFO) << "no need infer shape."; return RET_OK; } diff --git a/mindspore/lite/src/litert/inner_context.cc b/mindspore/lite/src/litert/inner_context.cc index 7cbac8f7..bf585ff0 100644 --- a/mindspore/lite/src/litert/inner_context.cc +++ b/mindspore/lite/src/litert/inner_context.cc @@ -122,6 +122,10 @@ int InnerContext::Init() { #endif } + if (IsDeviceTypeEnabled(DT_NNRT)) { + MS_LOG(DEBUG) << "NNRT enabled."; + } + if (CreateThreadPool(false)) { MS_LOG(ERROR) << "CreateThreadPool failed."; return RET_ERROR; diff --git a/mindspore/lite/src/litert/inner_context.h b/mindspore/lite/src/litert/inner_context.h index 88281eb1..8735961c 100644 --- a/mindspore/lite/src/litert/inner_context.h +++ b/mindspore/lite/src/litert/inner_context.h @@ -71,12 +71,26 @@ typedef struct CustomDeviceInfo { std::shared_ptr user_defined_device_info_; } CustomDeviceInfo; +typedef struct Extension { + std::string name; // config name + std::vector value; // config value +} Extension; + +typedef struct NNRtDeviceInfo { + size_t device_id_ = 0; + int priority_ = 0; + int performance_mode_ = 0; + bool enable_fp16_ = false; + std::vector extensions_; +} NNRtDeviceInfo; + struct DeviceInfo { CpuDeviceInfo cpu_device_info_; GpuDeviceInfo gpu_device_info_; NpuDeviceInfo npu_device_info_; AscendDeviceInfo ascend_device_info_; CustomDeviceInfo custom_device_info_; + NNRtDeviceInfo nnrt_device_info_; }; struct DeviceContext { diff --git a/mindspore/lite/src/litert/kernel/cpu/BUILD.gn b/mindspore/lite/src/litert/kernel/cpu/BUILD.gn index 48308425..65065b5b 100644 --- a/mindspore/lite/src/litert/kernel/cpu/BUILD.gn +++ b/mindspore/lite/src/litert/kernel/cpu/BUILD.gn @@ -13,6 +13,10 @@ cpu_kernel_sources = [ "base/call.cc", "base/constant_of_shape.cc", "base/convolution_base.cc", + "base/custom_base.cc", + "base/custom_masked_fill.cc", + "base/custom_is_inf.cc", + "base/custom_tensor_scatter.cc", "base/detection_post_process_base.cc", "base/format_transpose.cc", "base/group_convolution_base.cc", @@ -37,7 +41,6 @@ cpu_kernel_sources = [ "fp32/batchnorm_fp32.cc", "fp32/batch_to_space_fp32.cc", "fp32/broadcast_to_fp32.cc", - "fp32/cast_for_x86_fp16.cc", "fp32/cast_fp32.cc", "fp32/convolution_1x1_fp32.cc", "fp32/convolution_delegate_fp32.cc", @@ -118,6 +121,10 @@ cpu_kernel_sources = [ "fp32/online_fusion/split_reduce_concat_fp32.cc", ] +if ((target_cpu != "arm") && (target_cpu != "arm64")) { + cpu_kernel_sources += [ "src/runtime/kernel/cpu/fp32/cast_for_x86_fp16.cc" ] +} + arm64_cpu_kernel_sources = [ "fp32/convolution_im2col_arm64_fp32.cc", "fp32/matmul_fp32_arm64.cc", @@ -142,6 +149,42 @@ sse_avx_avx512_kernel_sources = [ "fp32/matmul_fp32_avx512.cc", ] +fp16_kernel_sources = [ + "fp16/batchnorm_fp16.cc", + "fp16/biasadd_fp16.cc", + "fp16/cast_fp16.cc", + "fp16/common_fp16.cc", + "fp16/convolution_1x1_fp16.cc", + "fp16/convolution_delegate_fp16.cc", + "fp16/convolution_depthwise_3x3_fp16.cc", + "fp16/convolution_depthwise_fp16.cc", + "fp16/convolution_depthwise_slidewindow_fp16.cc", + "fp16/convolution_fp16.cc", + "fp16/convolution_winograd_fp16.cc", + "fp16/custom_gru_fp16.cc", + "fp16/deconvolution_depthwise_fp16.cc", + "fp16/deconvolution_fp16.cc", + "fp16/deconvolution_winograd_fp16.cc", + "fp16/depth_to_space_fp16.cc", + "fp16/dynamic_quant_fp16.cc", + "fp16/fullconnection_fp16.cc", + "fp16/fused_batchnorm_fp16.cc", + "fp16/group_convolution_fp16.cc", + "fp16/gru_fp16.cc", + "fp16/instance_norm_fp16.cc", + "fp16/layout_transform_fp16.cc", + "fp16/lstm_fp16.cc", + "fp16/matmul_base_fp16.cc", + "fp16/matmul_fp16.cc", + "fp16/power_fp16.cc", + "fp16/prelu_fp16.cc", + "fp16/quant_dtype_cast_fp16.cc", + "fp16/reduce_fp16.cc", + "fp16/resize_fp16.cc", + "fp16/slice_fp16.cc", + "fp16/where_fp16.cc", +] + int8_kernel_sources = [ "int8/activation_int8.cc", "int8/add_int8.cc", @@ -227,6 +270,12 @@ all_cpu_kernel_sources += int8_kernel_sources all_cpu_kernel_sources += string_kernel_sources all_cpu_kernel_sources += control_kernel_sources +if (target_cpu == "arm64") { + all_cpu_kernel_sources += fp16_kernel_sources +} else { + not_needed(fp16_kernel_sources) +} + if (target_cpu == "arm") { all_cpu_kernel_sources -= arm64_cpu_kernel_sources all_cpu_kernel_sources -= sse_avx_avx512_kernel_sources diff --git a/mindspore/lite/src/litert/kernel/cpu/base/custom_base.cc b/mindspore/lite/src/litert/kernel/cpu/base/custom_base.cc new file mode 100644 index 00000000..9921e063 --- /dev/null +++ b/mindspore/lite/src/litert/kernel/cpu/base/custom_base.cc @@ -0,0 +1,46 @@ +/** + * Copyright 2022 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "src/litert/kernel/cpu/base/custom_base.h" +#include +#include +#include +#include "src/litert/kernel_registry.h" +#include "nnacl/op_base.h" + +using mindspore::kernel::KERNEL_ARCH; +using mindspore::lite::KernelRegistrar; +using mindspore::lite::RET_ERROR; +using mindspore::lite::RET_OK; +using mindspore::schema::PrimitiveType_Custom; + +namespace mindspore::kernel { +int CustomBaseCPUKernel::Prepare() { + return RET_OK; +} + +int CustomBaseCPUKernel::ReSize() { + return RET_OK; +} + +int CustomBaseCPUKernel::Run() { + return RET_OK; +} + +REG_KERNEL(kCPU, kNumberTypeInt32, PrimType_Inner_ThirdPartyModel, LiteKernelCreator) +REG_KERNEL(kCPU, kNumberTypeFloat32, PrimType_Inner_ThirdPartyModel, LiteKernelCreator) +REG_KERNEL(kCPU, kNumberTypeBool, PrimType_Inner_ThirdPartyModel, LiteKernelCreator) +} // namespace mindspore::kernel diff --git a/mindspore/lite/src/litert/kernel/cpu/base/custom_base.h b/mindspore/lite/src/litert/kernel/cpu/base/custom_base.h new file mode 100644 index 00000000..ecb4c72d --- /dev/null +++ b/mindspore/lite/src/litert/kernel/cpu/base/custom_base.h @@ -0,0 +1,43 @@ +/** + * Copyright 2022 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_LITE_SRC_LITERT_KERNEL_CPU_BASE_CUSTOM_BASE_H_ +#define MINDSPORE_LITE_SRC_LITERT_KERNEL_CPU_BASE_CUSTOM_BASE_H_ + +#include +#include "src/litert/lite_kernel.h" +#include "nnacl/custom_parameter.h" + +namespace mindspore::kernel { +class CustomBaseCPUKernel : public LiteKernel { + public: + CustomBaseCPUKernel(OpParameter *parameter, const std::vector &inputs, + const std::vector &outputs, const lite::InnerContext *ctx) + : LiteKernel(parameter, inputs, outputs, ctx) { + custom_param_ = reinterpret_cast(op_parameter_); + } + ~CustomBaseCPUKernel() override = default; + + int Prepare() override; + int ReSize() override; + int Run() override; + + private: + CustomParameter *custom_param_ = nullptr; +}; +} // namespace mindspore::kernel + +#endif // MINDSPORE_LITE_SRC_LITERT_KERNEL_CPU_BASE_CUSTOM_BASE_H_ diff --git a/mindspore/lite/src/litert/kernel/cpu/base/custom_is_inf.cc b/mindspore/lite/src/litert/kernel/cpu/base/custom_is_inf.cc new file mode 100644 index 00000000..edffea42 --- /dev/null +++ b/mindspore/lite/src/litert/kernel/cpu/base/custom_is_inf.cc @@ -0,0 +1,61 @@ +/** + * Copyright 2023 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "src/litert/kernel_registry.h" +#include "include/errorcode.h" +#include "src/litert/kernel/cpu/base/custom_is_inf.h" +#include "src/common/tensor_util.h" +#include "nnacl/op_base.h" + +using mindspore::lite::KernelRegistrar; +using mindspore::lite::RET_ERROR; +using mindspore::lite::RET_OK; + +namespace mindspore::kernel { + +int CustomIsInfCPUKernel::Prepare() { + CHECK_LESS_RETURN(in_tensors_.size(), C1NUM); + CHECK_LESS_RETURN(out_tensors_.size(), C1NUM); + return RET_OK; +} + +int CustomIsInfCPUKernel::ReSize() { return RET_OK; } + +void CustomIsInfCPUKernel::LaunchKernelFloat(const float *input, bool *output) { + auto elem_num = in_tensors_[FIRST_INPUT]->ElementsNum(); + + for (int i = 0; i < elem_num; i++) { + output[i] = std::isinf(input[i]); + } +} + +int CustomIsInfCPUKernel::Run() { + auto input = in_tensors_[FIRST_INPUT]; + auto output = out_tensors_[FIRST_INPUT]; + CHECK_NULL_RETURN(input); + CHECK_NULL_RETURN(output); + + if (input->data_type() == kNumberTypeFloat32 || input->data_type() == kNumberTypeFloat) { + LaunchKernelFloat(reinterpret_cast(input->data()), reinterpret_cast(output->data())); + } else { + MS_LOG(ERROR) << "unsupported input data type " << input->data_type(); + return RET_ERROR; + } + + return RET_OK; +} + +REG_KERNEL(kCPU, kNumberTypeFloat32, PrimType_Inner_CustomIsInf, LiteKernelCreator) +} // namespace mindspore::kernel diff --git a/mindspore/lite/src/litert/kernel/cpu/base/custom_is_inf.h b/mindspore/lite/src/litert/kernel/cpu/base/custom_is_inf.h new file mode 100644 index 00000000..e63d8ec7 --- /dev/null +++ b/mindspore/lite/src/litert/kernel/cpu/base/custom_is_inf.h @@ -0,0 +1,38 @@ +/** + * Copyright 2023 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_IS_INF_CPU_H_ +#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_IS_INF_CPU_H_ + +#include +#include "src/litert/lite_kernel.h" + +namespace mindspore::kernel { +class CustomIsInfCPUKernel : public LiteKernel { + public: + CustomIsInfCPUKernel(OpParameter *parameter, const std::vector &inputs, + const std::vector &outputs, const lite::InnerContext *ctx) + : LiteKernel(parameter, inputs, outputs, ctx) {} + ~CustomIsInfCPUKernel() override = default; + int Prepare() override; + int ReSize() override; + int Run() override; + + private: + void LaunchKernelFloat(const float *input, bool *output); +}; +} // namespace mindspore::kernel + +#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_IS_INF_CPU_H_ diff --git a/mindspore/lite/src/litert/kernel/cpu/base/custom_masked_fill.cc b/mindspore/lite/src/litert/kernel/cpu/base/custom_masked_fill.cc new file mode 100644 index 00000000..9af1af5d --- /dev/null +++ b/mindspore/lite/src/litert/kernel/cpu/base/custom_masked_fill.cc @@ -0,0 +1,84 @@ +/** + * Copyright 2023 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "src/litert/kernel_registry.h" +#include "include/errorcode.h" +#include "src/litert/kernel/cpu/base/custom_masked_fill.h" +#include "src/common/tensor_util.h" +#include "nnacl/op_base.h" + +using mindspore::lite::KernelRegistrar; +using mindspore::lite::RET_ERROR; +using mindspore::lite::RET_OK; + +namespace mindspore::kernel { + +int CustomMaskedFillCPUKernel::Prepare() { + CHECK_LESS_RETURN(in_tensors_.size(), C3NUM); + CHECK_LESS_RETURN(out_tensors_.size(), C1NUM); + + // only support input value as a single float value + MS_CHECK_TRUE_MSG(in_tensors_[FIRST_INPUT]->data_type() == mindspore::TypeId::kNumberTypeFloat32 || + in_tensors_[FIRST_INPUT]->data_type() == mindspore::TypeId::kNumberTypeFloat, + RET_ERROR, "input dtype must be float32"); + if (in_tensors_[THIRD_INPUT]->ElementsNum() != 1) { + MS_LOG(ERROR) << "only support fill value as a single float"; + return RET_ERROR; + } + MS_CHECK_TRUE_MSG(in_tensors_[SECOND_INPUT]->data_type() == mindspore::TypeId::kNumberTypeBool, RET_ERROR, + "mask dtype must be bool"); + if (!InferShapeDone()) { + return RET_OK; + } + return ReSize(); +} + +int CustomMaskedFillCPUKernel::ReSize() { return RET_OK; } + +int CustomMaskedFillCPUKernel::Run() { + auto input = in_tensors_[FIRST_INPUT]; + auto mask = in_tensors_[SECOND_INPUT]; + auto value = in_tensors_[THIRD_INPUT]; + auto output = out_tensors_[FIRST_INPUT]; + CHECK_NULL_RETURN(input); + CHECK_NULL_RETURN(mask); + CHECK_NULL_RETURN(value); + CHECK_NULL_RETURN(output); + + if (input->shape() != mask->shape()) { + MS_LOG(ERROR) << "Not support broadcast mask to input"; + return RET_ERROR; + } + + auto value_data = reinterpret_cast(value->data()); + auto fill_value = value_data[0]; + + auto data_num = input->ElementsNum(); + auto input_data = reinterpret_cast(input->data()); + auto mask_data = reinterpret_cast(mask->data()); + auto output_data = reinterpret_cast(output->data()); + for (int64_t i = 0; i < data_num; i++) { + if (mask_data[i]) { + output_data[i] = fill_value; + } else { + output_data[i] = input_data[i]; + } + } + + return RET_OK; +} + +REG_KERNEL(kCPU, kNumberTypeFloat32, PrimType_Inner_CustomMaskedFill, LiteKernelCreator) +} // namespace mindspore::kernel diff --git a/mindspore/lite/src/litert/kernel/cpu/base/custom_masked_fill.h b/mindspore/lite/src/litert/kernel/cpu/base/custom_masked_fill.h new file mode 100644 index 00000000..04a2dcab --- /dev/null +++ b/mindspore/lite/src/litert/kernel/cpu/base/custom_masked_fill.h @@ -0,0 +1,35 @@ +/** + * Copyright 2023 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_CUSTOM_MASKED_FILL_H_ +#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_CUSTOM_MASKED_FILL_H_ + +#include +#include "src/litert/lite_kernel.h" + +namespace mindspore::kernel { +class CustomMaskedFillCPUKernel : public LiteKernel { + public: + CustomMaskedFillCPUKernel(OpParameter *parameter, const std::vector &inputs, + const std::vector &outputs, const lite::InnerContext *ctx) + : LiteKernel(parameter, inputs, outputs, ctx) {} + ~CustomMaskedFillCPUKernel() override = default; + int Prepare() override; + int ReSize() override; + int Run() override; +}; +} // namespace mindspore::kernel + +#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_CUSTOM_MASKED_FILL_H_ diff --git a/mindspore/lite/src/litert/kernel/cpu/base/custom_tensor_scatter.cc b/mindspore/lite/src/litert/kernel/cpu/base/custom_tensor_scatter.cc new file mode 100644 index 00000000..d52d67d5 --- /dev/null +++ b/mindspore/lite/src/litert/kernel/cpu/base/custom_tensor_scatter.cc @@ -0,0 +1,75 @@ +/** + * Copyright 2022 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "src/litert/kernel/cpu/base/custom_tensor_scatter.h" +#include +#include "schema/model_generated.h" +#include "src/litert/kernel_registry.h" +#include "include/errorcode.h" +#include "nnacl/base/scatter_nd_binary.h" + +using mindspore::kernel::KERNEL_ARCH; +using mindspore::lite::KernelRegistrar; +using mindspore::lite::RET_ERROR; +using mindspore::lite::RET_OK; + +namespace mindspore::kernel { +namespace { +int TensorScatterRun(void *cdata, int task_id, float, float) { + auto kernel = static_cast(cdata); + CHECK_NULL_RETURN(kernel); + return kernel->TensorScatterDispatch(task_id); +} +} // namespace + +int CustomTensorScatterCPUKernel::TensorScatterDispatch(int task_id) { + auto data_type = in_tensors_[kScatterUpdateInputIndex]->data_type(); + if (data_type != kNumberTypeFloat32) { + MS_LOG(ERROR) << "TensorScatterMax only support float32 input tensor, but got " << data_type; + return RET_ERROR; + } + int type = data_type == kNumberTypeFloat32 ? 0 : 1; + // multi thread have some problems to solve + param_->op_parameter.thread_num_ = 1; + auto ret = ScatterNDMax(in_tensors_[kScatterUpdateIndex]->data(), out_tensors_[kOutputIndex]->data(), + output_unit_offsets_.data(), param_, type, task_id); + if (ret != RET_OK) { + MS_LOG(ERROR) << "ScatterNDMax failed, ret: " << ret; + return RET_ERROR; + } + return RET_OK; +} + +int CustomTensorScatterCPUKernel::Run() { + auto in_tensor = in_tensors().front(); + auto out_tensor = out_tensors().front(); + (void)memcpy(out_tensor->data(), in_tensor->data(), in_tensor->Size()); + auto indices = in_tensors_.at(kScatterIndicesIndex); + if (!indices->IsConst() && ReSize() != RET_OK) { + MS_LOG(ERROR) << "TensorScatterAdd resize failed."; + return RET_ERROR; + } + + auto ret = ParallelLaunch(ms_context_, TensorScatterRun, this, op_parameter_->thread_num_); + if (ret != RET_OK) { + MS_LOG(ERROR) << "TensorScatterAdd error error_code[" << ret << "]"; + } + return ret; +} + +REG_KERNEL(kCPU, kNumberTypeFloat32, PrimType_Inner_CustomTensorScatterMax, + LiteKernelCreator) +} // namespace mindspore::kernel diff --git a/mindspore/lite/src/litert/kernel/cpu/base/custom_tensor_scatter.h b/mindspore/lite/src/litert/kernel/cpu/base/custom_tensor_scatter.h new file mode 100644 index 00000000..e39733c5 --- /dev/null +++ b/mindspore/lite/src/litert/kernel/cpu/base/custom_tensor_scatter.h @@ -0,0 +1,36 @@ +/** + * Copyright 2022 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_LITE_SRC_LITERT_KERNEL_CPU_BASE_TENSOR_SCATTER_ADD_H_ +#define MINDSPORE_LITE_SRC_LITERT_KERNEL_CPU_BASE_TENSOR_SCATTER_ADD_H_ + +#include +#include "src/litert/kernel/cpu/base/scatter_nd_binary.h" + +namespace mindspore::kernel { +class CustomTensorScatterCPUKernel : public ScatterNDBinaryCPUKernel { + public: + explicit CustomTensorScatterCPUKernel(OpParameter *parameter, const std::vector &inputs, + const std::vector &outputs, const lite::InnerContext *ctx) + : ScatterNDBinaryCPUKernel(parameter, inputs, outputs, ctx) {} + ~CustomTensorScatterCPUKernel() override = default; + + int Run() override; + int TensorScatterDispatch(int task_id); +}; +} // namespace mindspore::kernel + +#endif // MINDSPORE_LITE_SRC_LITERT_KERNEL_CPU_BASE_TENSOR_SCATTER_ADD_H_ diff --git a/mindspore/lite/src/litert/lite_model.cc b/mindspore/lite/src/litert/lite_model.cc index 2c5bc658..13652633 100644 --- a/mindspore/lite/src/litert/lite_model.cc +++ b/mindspore/lite/src/litert/lite_model.cc @@ -98,6 +98,8 @@ int LiteModel::ConvertSubGraph(const schema::SubGraph &sub_graph) { if (sub_graph.name() == nullptr || sub_graph.inputIndices() == nullptr || sub_graph.outputIndices() == nullptr || sub_graph.tensorIndices() == nullptr) { MS_LOG(ERROR) << "sub_graph is invalid"; + MS_LOG(ERROR) << "sub_graph.name() = " << sub_graph.name() << ", sub_graph.inputIndices() = " << sub_graph.inputIndices() + << ", sub_graph.outputIndices() = " << sub_graph.outputIndices() << ", sub_graph.tensorIndices() = " << sub_graph.tensorIndices(); return RET_ERROR; } @@ -620,6 +622,33 @@ Model *ImportFromBuffer(const char *model_buf, size_t size, bool take_buf, minds return model; } +std::string LiteGraph::ToString() const { + std::stringstream ss; + ss << "all_nodes: " << all_nodes_.size() << std::endl; + for (size_t i = 0; i < all_nodes_.size(); i++) { + ss << "- node " << i << ": " << all_nodes_[i]->primitive_ << std::endl; + ss << "- node " << i << " input_indices_: " << all_nodes_[i]->input_indices_ << std::endl; + ss << "- node " << i << " output_indices_: " << all_nodes_[i]->output_indices_ << std::endl; + } + ss << "all_tensors: " << all_tensors_.size() << std::endl; + for (size_t i = 0; i < all_tensors_.size(); i++) { + ss << "- tensor " << i << ": " << all_tensors_[i] << std::endl; + } + ss << "input_indices: " << input_indices_<< std::endl; + ss << "output_indices: " << output_indices_ << std::endl; + + ss << "subgraphs: " << std::endl; + int count = 0; + for (auto subgraph: sub_graphs_) { + ss << "- subgraph " << count++ << std::endl; + ss << "--- subgraph input " << subgraph->input_indices_ << std::endl; + ss << "--- subgraph output " << subgraph->output_indices_ << std::endl; + ss << "--- subgraph node " << subgraph->node_indices_ << std::endl; + ss << "--- subgraph tensor " << subgraph->tensor_indices_ << std::endl; + } + return ss.str(); +} + Model *Model::Import(const char *model_buf, size_t size) { return ImportFromBuffer(model_buf, size, false); } Model *Model::Import(const char *filename) { return ImportFromPath(filename); } diff --git a/mindspore/lite/src/litert/lite_session.cc b/mindspore/lite/src/litert/lite_session.cc index 8f54879e..f635c8d2 100644 --- a/mindspore/lite/src/litert/lite_session.cc +++ b/mindspore/lite/src/litert/lite_session.cc @@ -67,6 +67,9 @@ #include "thread/parallel_thread_pool_manager.h" #endif #include "src/litert/runtime_packed_node_pass.h" +#ifdef SUPPORT_NNRT +#include "src/litert/delegate/nnrt/nnrt_delegate.h" +#endif using AbstractBaseModel = mindspore::infer::AbstractBaseModel; @@ -635,12 +638,6 @@ int LiteSession::CompileGraph(Model *model) { MarkSharedWeight(kernels_); FreePackOpWeight(kernels_); - ret = RuntimeAllocatorInit(); - if (ret != RET_OK) { - MS_LOG(ERROR) << "Runtime allocator init failed."; - is_running_.store(false); - return ret; - } infer_along_running_ = infer_along_running_ && (runtime_allocator_ == nullptr); if (infer_along_running_) { this->context_->set_infer_checker(InferCheckerAll); @@ -1092,6 +1089,27 @@ int LiteSession::CreateCoreMLDelegate() { return RET_OK; } +int LiteSession::CreateNNRTDelegate() { +#if SUPPORT_NNRT + auto iter = std::find_if(context_->device_list_.begin(), context_->device_list_.end(), + [](DeviceContext &device) { return device.device_type_ == lite::DT_NNRT; }); + if(iter == context_->device_list_.end()) { + MS_LOG(ERROR) << "Found non NNRT device info"; + return RET_ERROR; + } + + delegate_ = std::make_shared(iter->device_info_.nnrt_device_info_); + if (delegate_ == nullptr) { + MS_LOG(ERROR) << "New NNRT delegate failed"; + return RET_ERROR; + } +// ((NNRTDelegate *)(delegate_.get()))->SetMetaGraph(this->model_->buf); + delegate_device_type_ = DT_NNRT; + this->context_->delegate = delegate_; +#endif + return RET_OK; +}; + int LiteSession::DelegateInit() { #ifndef DELEGATE_CLIP int ret = RET_OK; @@ -1115,6 +1133,8 @@ int LiteSession::DelegateInit() { ret = CreateNPUDelegate(); } else if (context_->IsDeviceTypeEnabled(DT_GPU)) { ret = CreateTensorRTDelegate(); + } else if (context_->IsDeviceTypeEnabled(DT_NNRT)) { + ret = CreateNNRTDelegate(); } } @@ -1496,12 +1516,6 @@ int LiteSession::Resize(const std::vector &inputs, return ret; } - if (RuntimeAllocatorInit() != RET_OK) { - MS_LOG(ERROR) << "Runtime allocator in resize failed."; - is_running_.store(false); - return RET_ERROR; - } - auto status = GraphOptimizePass(&kernels_); if (status != RET_OK) { MS_LOG(ERROR) << "GraphOptimizePass failed."; @@ -2022,7 +2036,6 @@ int lite::LiteSession::LoadModelAndCompileByPath(const std::string &model_path, delete model; return RET_ERROR; } - model->Free(); set_model(model); return RET_OK; } diff --git a/mindspore/lite/src/litert/lite_session.h b/mindspore/lite/src/litert/lite_session.h index f8f8fe08..64a5f6d3 100644 --- a/mindspore/lite/src/litert/lite_session.h +++ b/mindspore/lite/src/litert/lite_session.h @@ -178,6 +178,7 @@ class MS_API LiteSession { int CreateNPUDelegate(); int CreateNNAPIDelegate(); int CreateCoreMLDelegate(); + int CreateNNRTDelegate(); int DelegateInit(); int InitGPURuntime(); int InitSharedThreadPool(); diff --git a/mindspore/lite/src/litert/scheduler.cc b/mindspore/lite/src/litert/scheduler.cc index 11382b09..199b4361 100644 --- a/mindspore/lite/src/litert/scheduler.cc +++ b/mindspore/lite/src/litert/scheduler.cc @@ -60,6 +60,9 @@ #if defined(PARALLEL_INFERENCE) && defined(ENABLE_MINDRT) #include "thread/parallel_thread_pool_manager.h" #endif +#ifdef SUPPORT_NNRT +#include "src/litert/delegate/nnrt/nnrt_delegate.h" +#endif using AbstractBaseModel = mindspore::infer::AbstractBaseModel; @@ -368,6 +371,7 @@ STATUS Scheduler::DelQuantDTypeCastKernel(std::vector *ker } int Scheduler::Schedule(std::vector *dst_kernels) { + MS_LOG(DEBUG) << "Start schedule."; int check_input_ret = CheckInputParam(dst_kernels); if (check_input_ret != RET_OK) { MS_LOG(ERROR) << "CheckInputParam failed! ret: " << check_input_ret; @@ -404,11 +408,13 @@ int Scheduler::Schedule(std::vector *dst_kernels) { } shape_fusion_pass_->StoreStateAndReset(); + MS_LOG(DEBUG) << "Start to init delegate kernels."; ret = InitDelegateKernels(dst_kernels); if (ret != RET_OK) { MS_LOG(ERROR) << "Repalce delegate kernels failed."; return ret; } + MS_LOG(DEBUG) << "Finish to init delegate kernels."; ret = CheckCpuValid(dst_kernels); if (ret != RET_OK) { @@ -500,6 +506,17 @@ int Scheduler::ReplaceDelegateKernels(std::vector *dst_ker MS_LOG(ERROR) << "New delegate model failed."; return RET_NULL_PTR; } + +#ifdef SUPPORT_NNRT + if (context_->IsDeviceTypeEnabled(DT_NNRT)) { + auto delegate = static_cast(delegate_.get()); + delegate->ShallowCopyLiteGraph(this->src_model_->graph_); + void *meta_graph = reinterpret_cast(const_cast( + mindspore::schema::GetMetaGraph(this->src_model_->buf))); + delegate->SetMetaGraph(meta_graph); + } +#endif + auto ret = delegate_->Build(model); if (ret != mindspore::kSuccess) { delete model; diff --git a/mindspore/lite/src/litert/tensor_category.cc b/mindspore/lite/src/litert/tensor_category.cc index 70d13865..e57cdb28 100644 --- a/mindspore/lite/src/litert/tensor_category.cc +++ b/mindspore/lite/src/litert/tensor_category.cc @@ -30,5 +30,9 @@ Category TensorCategory(const schema::Tensor &tensor) { auto data_size = tensor.data() == nullptr ? 0 : tensor.data()->size(); return TensorCategory(tensor.nodeType(), shape_num, TypeId(tensor.dataType()), data_size); } + +bool IsConstTensor(const schema::Tensor &tensor) { + return TensorCategory(tensor) != Category::VAR; +} } // namespace lite } // namespace mindspore diff --git a/mindspore/lite/src/litert/tensor_category.h b/mindspore/lite/src/litert/tensor_category.h index 83273032..70e65b31 100644 --- a/mindspore/lite/src/litert/tensor_category.h +++ b/mindspore/lite/src/litert/tensor_category.h @@ -35,6 +35,7 @@ enum Category { Category TensorCategory(const int node_type, const size_t shape_num, const TypeId data_type, const size_t data_size); Category TensorCategory(const schema::Tensor &tensor); +bool IsConstTensor(const schema::Tensor &tensor); } // namespace lite } // namespace mindspore #endif // MINDSPORE_LITE_SRC_RUNTIME_TENSOR_CATEGORY_H_ diff --git a/mindspore/lite/test/CMakeLists.txt b/mindspore/lite/test/CMakeLists.txt index 60e240f0..78dab536 100644 --- a/mindspore/lite/test/CMakeLists.txt +++ b/mindspore/lite/test/CMakeLists.txt @@ -28,10 +28,14 @@ file(GLOB_RECURSE TEST_UT_SRC ${TEST_DIR}/ut/src/runtime/kernel/arm/common/*.cc ${TEST_DIR}/ut/src/runtime/kernel/arm/fp32/*.cc ${TEST_DIR}/ut/src/runtime/kernel/arm/string/*.cc - ${TEST_DIR}/ut/src/api/context_c_test.cc - ${TEST_DIR}/ut/src/api/model_c_test.cc - ${TEST_DIR}/ut/src/api/tensor_c_test.cc` +# ${TEST_DIR}/ut/src/api/context_c_test.cc +# ${TEST_DIR}/ut/src/api/model_c_test.cc +# ${TEST_DIR}/ut/src/api/tensor_c_test.cc` ) +if(MSLITE_ENABLE_NNRT) + list(APPEND TEST_UT_SRC ${TEST_DIR}/ut/src/runtime/nnrt_delegate/nnrt_delegate_tests.cc) +endif() + if(MSLITE_ENABLE_SERVER_INFERENCE) list(APPEND TEST_UT_SRC ${TEST_DIR}/ut/src/api/model_parallel_runner_test.cc) endif() @@ -86,7 +90,7 @@ endif() if(MSLITE_ENABLE_INT8) file(GLOB_RECURSE TEST_INT8_UT_SRC - ${TEST_DIR}/ut/src/runtime/kernel/arm/int8/*.cc +# ${TEST_DIR}/ut/src/runtime/kernel/arm/int8/*.cc ${TEST_DIR}/ut/nnacl/int8/*.cc ) list(APPEND TEST_UT_SRC ${TEST_INT8_UT_SRC}) @@ -118,6 +122,7 @@ if(MSLITE_ENABLE_CONVERTER) ${TEST_DIR}/ut/tools/converter/registry/*.cc ${TEST_DIR}/ut/tools/converter/parser/tflite/*.cc ${TEST_DIR}/ut/tools/converter/api/*.cc + ${TEST_DIR}/ut/tools/converter/config_parser/*.cc ${TEST_DIR}/st/converter_test.cc ${TEST_DIR}/st/delegate_test.cc ${TEST_DIR}/st/mindrt_parallel_test.cc @@ -232,7 +237,7 @@ endif() if(MSLITE_ENABLE_CONVERTER) target_link_libraries(lite-test-converter tflite_parser_mid caffe_parser_mid - onnx_parser_mid tf_parser_mid) + onnx_parser_mid tf_parser_mid third_party_parser_mid) endif() if(MSLITE_ENABLE_MODEL_OBF) diff --git a/mindspore/lite/test/runtest.sh b/mindspore/lite/test/runtest.sh index c0d6d843..abdea6f4 100644 --- a/mindspore/lite/test/runtest.sh +++ b/mindspore/lite/test/runtest.sh @@ -80,6 +80,7 @@ if [ "$ENABLE_CONVERTER_TEST" = true ]; then ./lite-test-converter --gtest_filter="PassRegistryTest.TestRegistry" ./lite-test-converter --gtest_filter="TestConverterAPI.*" ./lite-test-converter --gtest_filter="SpecifyGraphOutputFormatTest*" + ./lite-test-converter --gtest_filter="TestThirdPartyParamParser.*" fi ./lite-test --gtest_filter="TestRegistry.TestAdd" ./lite-test --gtest_filter="TestRegistryCustomOp.TestCustomAdd" diff --git a/mindspore/lite/test/ut/test_data/third_party_model.cfg b/mindspore/lite/test/ut/test_data/third_party_model.cfg new file mode 100644 index 00000000..b5fcba75 --- /dev/null +++ b/mindspore/lite/test/ut/test_data/third_party_model.cfg @@ -0,0 +1,8 @@ +[third_party_model] +input_names=demo_in_0;demo_in_1;demo_in_2 +input_dtypes=float32;float16;float64 +input_shapes=1;2,3;4,5,6 +output_names=demo_out_0;demo_out_1;demo_out_2;demo_out_4 +output_dtypes=int32;int16;int8;uint8 +output_shapes=10;20,30;40;50,60,70 +extended_parameters=foo:foo_value;bar:bar_value diff --git a/mindspore/lite/test/ut/tools/converter/api/converter_api_test.cc b/mindspore/lite/test/ut/tools/converter/api/converter_api_test.cc index 549bdd72..e73afc0e 100644 --- a/mindspore/lite/test/ut/tools/converter/api/converter_api_test.cc +++ b/mindspore/lite/test/ut/tools/converter/api/converter_api_test.cc @@ -34,3 +34,13 @@ TEST(TestConverterAPI, ConvertCaffeWithNotExistWeight) { mindspore::Converter converter(mindspore::converter::FmkType::kFmkTypeCaffe, caffe_model, output_model, caffe_weight); ASSERT_FALSE(converter.Convert().IsOk()); } + +TEST(TestConverterAPI, ConvertThirdParty) { + std::string third_party_model = "./relu.mindir"; + std::string config_model = "./third_party_model.cfg"; + std::string output_model = "./demo_third_party.ms"; + + mindspore::Converter converter(mindspore::converter::FmkType::kFmkTypeThirdParty, third_party_model, output_model); + converter.SetConfigFile(config_model); + ASSERT_TRUE(converter.Convert().IsOk()); +} \ No newline at end of file diff --git a/mindspore/lite/test/ut/tools/converter/config_parser/third_party_param_parser_test.cc b/mindspore/lite/test/ut/tools/converter/config_parser/third_party_param_parser_test.cc new file mode 100644 index 00000000..c8eb5536 --- /dev/null +++ b/mindspore/lite/test/ut/tools/converter/config_parser/third_party_param_parser_test.cc @@ -0,0 +1,176 @@ +/** + * Copyright 2023 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "gtest/gtest.h" +#include "tools/converter/config_parser/third_party_param_parser.h" + +using mindspore::ThirdPartyModelParam; +using mindspore::TypeId; +using mindspore::lite::RET_OK; +using mindspore::lite::ThirdPartyModelString; +using mindspore::lite::ThirdPartyParamParser; + +const ThirdPartyModelString kDemoSISOParam = { + // SISO is short for single-input-single-output. + .input_dtypes = "float32", + .input_shapes = "1,2,3,4", + .input_names = "siso_input", + .output_dtypes = "int32", + .output_shapes = "2", + .output_names = "siso_output", + .extended_parameters = "siso_foo:siso_foo_value;siso_bar:siso_bar_value", +}; + +const ThirdPartyModelString kDemoMIMOParam = { + // MIMO is short for multiple-input-multiple-output. + .input_dtypes = "float32;int8;float16", + .input_shapes = "1,2,3,4;5,6;7,8,9", + .input_names = "mimo_in_0;mimo_in_1;mimo_in_2", + .output_dtypes = "int32;float32", + .output_shapes = "2,4;10,20,30", + .output_names = "mimo_out_0;mimo_out_1", + .extended_parameters = "mimo_foo:mimo_foo_value;mimo_bar:mimo_bar_value", +}; + +TEST(TestThirdPartyParamParser, ParseSISOParam) { + ThirdPartyModelString param_string = kDemoSISOParam; + ThirdPartyModelParam result; + ASSERT_EQ(ThirdPartyParamParser::Parse(param_string, &result), RET_OK); + + ASSERT_EQ(result.input_names, std::vector{"siso_input"}); + ASSERT_EQ(result.input_shapes.size(), 1U); + std::vector expect_in_shape = {1, 2, 3, 4}; + ASSERT_EQ(result.input_shapes[0], expect_in_shape); + ASSERT_EQ(result.input_dtypes, std::vector{TypeId::kNumberTypeFloat32}); + + ASSERT_EQ(result.output_names, std::vector{"siso_output"}); + ASSERT_EQ(result.output_shapes.size(), 1U); + std::vector expect_out_shape = {2}; + ASSERT_EQ(result.output_shapes[0], expect_out_shape); + ASSERT_EQ(result.output_dtypes, std::vector{TypeId::kNumberTypeInt32}); + + const auto &ext_param = result.extended_parameters; + ASSERT_EQ(ext_param.size(), 2U); + ASSERT_TRUE(ext_param.find("siso_foo") != ext_param.end()); + auto expect_foo_value = ext_param.at("siso_foo"); + ASSERT_EQ(std::string(expect_foo_value.begin(), expect_foo_value.end()), "siso_foo_value"); + ASSERT_TRUE(ext_param.find("siso_bar") != ext_param.end()); + auto expect_bar_value = ext_param.at("siso_bar"); + ASSERT_EQ(std::string(expect_bar_value.begin(), expect_bar_value.end()), "siso_bar_value"); +} + +TEST(TestThirdPartyParamParser, ParseValidDtype) { + ThirdPartyModelString param_string = kDemoSISOParam; + const std::vector kValidDtypeStrings = { + "float64", "float32", "float16", "int64", "int32", "int16", "int8", "uint8", "bool", + }; + + const std::vector kExpects = { + TypeId::kNumberTypeFloat64, TypeId::kNumberTypeFloat32, TypeId::kNumberTypeFloat16, + TypeId::kNumberTypeInt64, TypeId::kNumberTypeInt32, TypeId::kNumberTypeInt16, + TypeId::kNumberTypeInt8, TypeId::kNumberTypeUInt8, TypeId::kNumberTypeBool}; + + for (size_t i = 0; i < kValidDtypeStrings.size(); i++) { + param_string.input_dtypes = kValidDtypeStrings[i]; + ThirdPartyModelParam result; + ASSERT_EQ(ThirdPartyParamParser::Parse(param_string, &result), RET_OK); + ASSERT_EQ(result.input_dtypes[0], kExpects[i]); + } +} + +TEST(TestThirdPartyParamParser, ParseInvalidDtype) { + ThirdPartyModelParam result; + ThirdPartyModelString param_string = kDemoSISOParam; + ASSERT_EQ(ThirdPartyParamParser::Parse(param_string, &result), RET_OK); + param_string.input_dtypes = "bad_dtype"; + ASSERT_NE(ThirdPartyParamParser::Parse(param_string, &result), RET_OK); +} + +TEST(TestThirdPartyParamParser, ParseValidShape) { + ThirdPartyModelString param_string = kDemoSISOParam; + param_string.input_shapes = "256,256,1024,96"; // Only support fixed shape. + ThirdPartyModelParam result; + ASSERT_EQ(ThirdPartyParamParser::Parse(param_string, &result), RET_OK); + std::vector expect = {256, 256, 1024, 96}; + ASSERT_EQ(result.input_shapes[0], expect); +} + +TEST(TestThirdPartyParamParser, ParseInvalidShape) { + ThirdPartyModelParam result; + ThirdPartyModelString param_string = kDemoSISOParam; + ASSERT_EQ(ThirdPartyParamParser::Parse(param_string, &result), RET_OK); + + param_string.input_shapes = "256,256,1024,-1"; + ASSERT_NE(ThirdPartyParamParser::Parse(param_string, &result), RET_OK); + + param_string.input_shapes = "256,256,0,96"; + ASSERT_NE(ThirdPartyParamParser::Parse(param_string, &result), RET_OK); + + param_string.input_shapes = "256,-256,1024,96"; + ASSERT_NE(ThirdPartyParamParser::Parse(param_string, &result), RET_OK); + + param_string.input_shapes = "256,foo,1024,96"; + ASSERT_NE(ThirdPartyParamParser::Parse(param_string, &result), RET_OK); +} + +TEST(TestThirdPartyParamParser, ParseDefaultName) { + ThirdPartyModelParam result; + ThirdPartyModelString param_string = kDemoSISOParam; + param_string.input_names = ""; + param_string.output_names = ""; + ASSERT_EQ(ThirdPartyParamParser::Parse(param_string, &result), RET_OK); + ASSERT_EQ(result.input_names[0], "in_0"); + ASSERT_EQ(result.output_names[0], "out_0"); +} + +TEST(TestThirdPartyParamParser, ParseMIMOParam) { + ThirdPartyModelString param_string = kDemoMIMOParam; + ThirdPartyModelParam result; + ASSERT_EQ(ThirdPartyParamParser::Parse(param_string, &result), RET_OK); + + std::vector expect_input_names = {"mimo_in_0", "mimo_in_1", "mimo_in_2"}; + ASSERT_EQ(result.input_names, expect_input_names); + std::vector> expect_input_shapes = {{1, 2, 3, 4}, {5, 6}, {7, 8, 9}}; + ASSERT_EQ(result.input_shapes, expect_input_shapes); + std::vector expect_input_dtypes = {TypeId::kNumberTypeFloat32, TypeId::kNumberTypeInt8, + TypeId::kNumberTypeFloat16}; + ASSERT_EQ(result.input_dtypes, expect_input_dtypes); + + std::vector expect_output_names = {"mimo_out_0", "mimo_out_1"}; + ASSERT_EQ(result.output_names, expect_output_names); + std::vector> expect_output_shapes = {{2, 4}, {10, 20, 30}}; + ASSERT_EQ(result.output_shapes, expect_output_shapes); + std::vector expect_output_dtypes = {TypeId::kNumberTypeInt32, TypeId::kNumberTypeFloat32}; + ASSERT_EQ(result.output_dtypes, expect_output_dtypes); +} + +TEST(TestThirdPartyParamParser, ParseMismatchedShapeAndDtypeSize) { + ThirdPartyModelString param_string = kDemoMIMOParam; + ThirdPartyModelParam result; + ASSERT_EQ(ThirdPartyParamParser::Parse(param_string, &result), RET_OK); + + param_string.input_shapes = "1,2,3,4;5,6"; // shape size is 2 while dtype size is 3. + ASSERT_NE(ThirdPartyParamParser::Parse(param_string, &result), RET_OK); +} + +TEST(TestThirdPartyParamParser, ParseMismatchedNameAndDtypeSize) { + ThirdPartyModelString param_string = kDemoMIMOParam; + ThirdPartyModelParam result; + ASSERT_EQ(ThirdPartyParamParser::Parse(param_string, &result), RET_OK); + + param_string.input_names = "mimo_in_0;mimo_in_1"; // name size is 2 while dtype size is 3. + ASSERT_NE(ThirdPartyParamParser::Parse(param_string, &result), RET_OK); +} diff --git a/mindspore/lite/tools/benchmark/benchmark_base.cc b/mindspore/lite/tools/benchmark/benchmark_base.cc index 16b1e218..ebaa9212 100644 --- a/mindspore/lite/tools/benchmark/benchmark_base.cc +++ b/mindspore/lite/tools/benchmark/benchmark_base.cc @@ -323,7 +323,7 @@ int BenchmarkBase::CheckThreadNumValid() { int BenchmarkBase::CheckDeviceTypeValid() { if (flags_->device_ != "CPU" && flags_->device_ != "GPU" && flags_->device_ != "NPU" && - flags_->device_ != "Ascend310" && flags_->device_ != "Ascend310P") { + flags_->device_ != "Ascend310" && flags_->device_ != "Ascend310P" && flags_->device_ != "NNRT") { MS_LOG(ERROR) << "Device type:" << flags_->device_ << " is not supported."; std::cerr << "Device type:" << flags_->device_ << " is not supported." << std::endl; return RET_ERROR; diff --git a/mindspore/lite/tools/benchmark/benchmark_base.h b/mindspore/lite/tools/benchmark/benchmark_base.h index acdea21a..f818270c 100644 --- a/mindspore/lite/tools/benchmark/benchmark_base.h +++ b/mindspore/lite/tools/benchmark/benchmark_base.h @@ -122,7 +122,7 @@ class MS_API BenchmarkFlags : public virtual FlagParser { AddFlag(&BenchmarkFlags::in_data_file_, "inDataFile", "Input data file, if not set, use random input", ""); AddFlag(&BenchmarkFlags::group_info_file_, "GroupInfoFile", "Communication group info file", ""); AddFlag(&BenchmarkFlags::config_file_, "configFile", "Config file", ""); - AddFlag(&BenchmarkFlags::device_, "device", "CPU | GPU | NPU | Ascend310 | Ascend310P | Auto", "CPU"); + AddFlag(&BenchmarkFlags::device_, "device", "CPU | GPU | NPU | Ascend310 | Ascend310P | NNRT | Auto", "CPU"); AddFlag(&BenchmarkFlags::provider_, "provider", "device provider litert | tensorrt | mindrt", "litert"); AddFlag(&BenchmarkFlags::cpu_bind_mode_, "cpuBindMode", "Input 0 for NO_BIND, 1 for HIGHER_CPU, 2 for MID_CPU.", 1); // MarkPerformance diff --git a/mindspore/lite/tools/benchmark/benchmark_c_api.cc b/mindspore/lite/tools/benchmark/benchmark_c_api.cc index 252e65c6..cb0c56b0 100644 --- a/mindspore/lite/tools/benchmark/benchmark_c_api.cc +++ b/mindspore/lite/tools/benchmark/benchmark_c_api.cc @@ -125,6 +125,10 @@ int BenchmarkCApi::InitContext() { OH_AI_DeviceInfoSetFrequency(npu_device_info, kFrequencyDefault); OH_AI_ContextAddDeviceInfo(context_, npu_device_info); } + if (flags_->device_ == "NNRT") { + OH_AI_DeviceInfoHandle nnrt_device_info = OH_AI_DeviceInfoCreate(OH_AI_DEVICETYPE_NNRT); + OH_AI_ContextAddDeviceInfo(context_, nnrt_device_info); + } OH_AI_DeviceInfoHandle cpu_device_info = OH_AI_DeviceInfoCreate(OH_AI_DEVICETYPE_CPU); OH_AI_DeviceInfoSetEnableFP16(cpu_device_info, flags_->enable_fp16_); OH_AI_ContextAddDeviceInfo(context_, cpu_device_info); diff --git a/mindspore/lite/tools/benchmark/benchmark_unified_api.cc b/mindspore/lite/tools/benchmark/benchmark_unified_api.cc index bb36c168..c18111b6 100644 --- a/mindspore/lite/tools/benchmark/benchmark_unified_api.cc +++ b/mindspore/lite/tools/benchmark/benchmark_unified_api.cc @@ -521,6 +521,11 @@ int BenchmarkUnifiedApi::InitMSContext(const std::shared_ptr // InitMSContextForAscend(context, &device_list); } + if (flags_->device_ == "NNRT" || flags_->device_ == "Auto") { + std::shared_ptr nnrt_device_info = std::make_shared(); + device_list.push_back(nnrt_device_info); + } + // CPU priority is behind GPU and NPU std::shared_ptr device_info = std::make_shared(); device_info->SetEnableFP16(flags_->enable_fp16_); diff --git a/mindspore/lite/tools/benchmark_train/CMakeLists.txt b/mindspore/lite/tools/benchmark_train/CMakeLists.txt index 0c558524..1b9fc347 100644 --- a/mindspore/lite/tools/benchmark_train/CMakeLists.txt +++ b/mindspore/lite/tools/benchmark_train/CMakeLists.txt @@ -9,6 +9,9 @@ set(COMMON_SRC set(TEST_SRC ${CMAKE_CURRENT_SOURCE_DIR}/main.cc ${CMAKE_CURRENT_SOURCE_DIR}/net_train.cc + ${CMAKE_CURRENT_SOURCE_DIR}/net_train_base.cc + ${CMAKE_CURRENT_SOURCE_DIR}/run_net_train.cc + ${CMAKE_CURRENT_SOURCE_DIR}/net_train_c_api.cc ) # add static securec link library diff --git a/mindspore/lite/tools/benchmark_train/main.cc b/mindspore/lite/tools/benchmark_train/main.cc index abf3d9dd..76f85aa7 100644 --- a/mindspore/lite/tools/benchmark_train/main.cc +++ b/mindspore/lite/tools/benchmark_train/main.cc @@ -17,7 +17,8 @@ #include #include #include -#include "tools/benchmark_train/net_train.h" +#include +#include "tools/benchmark_train/run_net_train.h" void PrintMem() { std::string proc_file = "/proc/" + std::to_string(getpid()) + "/status"; diff --git a/mindspore/lite/tools/benchmark_train/net_runner.cc b/mindspore/lite/tools/benchmark_train/net_runner.cc index 9b63d29f..edf3e964 100644 --- a/mindspore/lite/tools/benchmark_train/net_runner.cc +++ b/mindspore/lite/tools/benchmark_train/net_runner.cc @@ -15,7 +15,7 @@ */ #include "tools/benchmark_train/net_runner.h" -#include "tools/benchmark_train/net_train.h" +#include "tools/benchmark_train/net_train_base.h" #include #include #include @@ -187,7 +187,7 @@ int NetRunner::CompareOutput(const std::vector &outputs) { auto output = tensor.Data(); size_t size; std::string output_file = flags_->data_file_ + std::to_string(i) + ".bin"; - auto bin_buf = std::unique_ptr(mindspore::lite::NetTrain::ReadFileBuf(output_file.c_str(), &size)); + auto bin_buf = std::unique_ptr(mindspore::lite::NetTrainBase::ReadFileBuf(output_file.c_str(), &size)); if (bin_buf == nullptr) { MS_LOG(ERROR) << "ReadFile return nullptr"; std::cout << "ReadFile return nullptr" << std::endl; @@ -200,7 +200,7 @@ int NetRunner::CompareOutput(const std::vector &outputs) { << ", read size: " << size << std::endl; return mindspore::kLiteError; } - float bias = mindspore::lite::NetTrain::CompareData(bin_buf.get(), tensor.ElementNum(), + float bias = mindspore::lite::NetTrainBase::CompareData(bin_buf.get(), tensor.ElementNum(), reinterpret_cast(output.get())); if (bias >= 0) { total_bias += bias; @@ -332,7 +332,7 @@ int NetRunner::ReadInputFile(std::vector *ms_inputs) { } size_t size; std::string file_name = flags_->in_data_file_ + std::to_string(i + 1) + ".bin"; - auto bin_buf = std::unique_ptr(mindspore::lite::NetTrain::ReadFileBuf(file_name.c_str(), &size)); + auto bin_buf = std::unique_ptr(mindspore::lite::NetTrainBase::ReadFileBuf(file_name.c_str(), &size)); if (bin_buf == nullptr) { MS_LOG(ERROR) << "ReadFile return nullptr"; std::cout << "ReadFile return nullptr" << std::endl; @@ -368,4 +368,4 @@ int CallBack(mindspore::lite::NetTrainFlags *flags) { return nr.Main(); } -int init = mindspore::lite::NetTrain::SetNr(CallBack); +int init = mindspore::lite::NetTrainBase::SetNr(CallBack); diff --git a/mindspore/lite/tools/benchmark_train/net_train.cc b/mindspore/lite/tools/benchmark_train/net_train.cc index d1150043..514bba53 100644 --- a/mindspore/lite/tools/benchmark_train/net_train.cc +++ b/mindspore/lite/tools/benchmark_train/net_train.cc @@ -31,74 +31,11 @@ namespace mindspore { namespace lite { -static const char *DELIM_SLASH = "/"; -constexpr const char *DELIM_COLON = ":"; -constexpr const char *DELIM_COMMA = ","; -constexpr int RET_TOO_BIG = -9; constexpr int kField0 = 0; constexpr int kField1 = 1; constexpr int kField2 = 2; constexpr int kField3 = 3; constexpr int kField4 = 4; -constexpr int kFieldsToPrint = 5; -constexpr int kPrintOffset = 4; -static const int kTHOUSAND = 1000; -constexpr int kDumpInputsAndOutputs = 0; -constexpr int kDumpOutputs = 2; - -const std::unordered_map kTypeIdMap{ - {kNumberTypeFloat16, "Float16"}, {kNumberTypeFloat, "Float32"}, {kNumberTypeFloat32, "Float32"}, - {kNumberTypeInt8, "Int8"}, {kNumberTypeInt16, "Int16"}, {kNumberTypeInt, "Int32"}, - {kNumberTypeInt32, "Int32"}, {kNumberTypeUInt8, "UInt8"}, {kNumberTypeUInt16, "UInt16"}, - {kNumberTypeUInt, "UInt32"}, {kNumberTypeUInt32, "UInt32"}, {kObjectTypeString, "String"}, - {kNumberTypeBool, "Bool"}, {kObjectTypeTensorType, "Tensor"}}; - -const std::unordered_map kTensorFormatMap{ - {mindspore::NCHW, "NCHW"}, {mindspore::NHWC, "NHWC"}, {mindspore::NHWC4, "NHWC4"}, {mindspore::HWKC, "HWKC"}, - {mindspore::HWCK, "HWCK"}, {mindspore::KCHW, "KCHW"}, {mindspore::CKHW, "CKHW"}, {mindspore::KHWC, "KHWC"}, - {mindspore::CHWK, "CHWK"}, {mindspore::HW, "HW"}, {mindspore::HW4, "HW4"}, {mindspore::NC, "NC"}, - {mindspore::NC4, "NC4"}, {mindspore::NC4HW4, "NC4HW4"}, {mindspore::NCDHW, "NCDHW"}}; - -std::function NetTrain::nr_cb_ = nullptr; - -int NetTrain::SetNr(std::function param) { - nr_cb_ = param; - return 0; -} - -float *NetTrain::ReadFileBuf(const std::string file, size_t *size) { - if (file.empty()) { - MS_LOG(ERROR) << "file is nullptr"; - return nullptr; - } - MS_ASSERT(size != nullptr); - std::string real_path = RealPath(file.c_str()); - std::ifstream ifs(real_path); - if (!ifs.good()) { - MS_LOG(ERROR) << "file: " << real_path << " is not exist"; - return nullptr; - } - - if (!ifs.is_open()) { - MS_LOG(ERROR) << "file: " << real_path << " open failed"; - return nullptr; - } - - ifs.seekg(0, std::ios::end); - *size = ifs.tellg(); - std::unique_ptr buf = std::make_unique(*size / sizeof(float) + 1); - if (buf == nullptr) { - MS_LOG(ERROR) << "malloc buf failed, file: " << real_path; - ifs.close(); - return nullptr; - } - - ifs.seekg(0, std::ios::beg); - ifs.read(reinterpret_cast(buf.get()), *size); - ifs.close(); - - return buf.release(); -} int NetTrain::GenerateInputData() { for (auto tensor : ms_inputs_for_api_) { @@ -120,28 +57,6 @@ int NetTrain::GenerateInputData() { return RET_OK; } -int NetTrain::LoadInput() { - inputs_buf_.clear(); - inputs_size_.clear(); - batch_num_ = 0; - if (flags_->in_data_file_.empty()) { - auto status = GenerateInputData(); - if (status != RET_OK) { - std::cerr << "Generate input data error " << status << std::endl; - MS_LOG(ERROR) << "Generate input data error " << status; - return status; - } - } else { - auto status = ReadInputFile(); - if (status != RET_OK) { - std::cerr << "Read Input File error, " << status << std::endl; - MS_LOG(ERROR) << "Read Input File error, " << status; - return status; - } - } - return RET_OK; -} - int NetTrain::LoadStepInput(size_t step) { if (step >= batch_num_) { auto cur_batch = step + 1; @@ -269,30 +184,6 @@ int NetTrain::CompareOutput() { } } -std::string GenerateOutputFileName(mindspore::MSTensor *tensor, const std::string &op_name, - const std::string &file_type, const size_t &idx) { - std::string file_name = op_name; - auto pos = file_name.find_first_of('/'); - while (pos != std::string::npos) { - file_name.replace(pos, 1, "."); - pos = file_name.find_first_of('/'); - } - file_name += "_" + file_type + "_" + std::to_string(idx) + "_shape_"; - for (const auto &dim : tensor->Shape()) { - file_name += std::to_string(dim) + "_"; - } - if (kTypeIdMap.find(static_cast(tensor->DataType())) != kTypeIdMap.end()) { - file_name += kTypeIdMap.at(static_cast(tensor->DataType())); - } - auto tensor_format = tensor->format(); - if (kTensorFormatMap.find(tensor_format) != kTensorFormatMap.end()) { - file_name += "_" + kTensorFormatMap.at(tensor_format) + ".bin"; - } - - file_name += ".bin"; - return file_name; -} - int NetTrain::MarkPerformance() { MS_LOG(INFO) << "Running train loops..."; std::cout << "Running train loops..." << std::endl; @@ -574,26 +465,6 @@ int NetTrain::CreateAndRunNetwork(const std::string &filename, const std::string return RET_OK; } -int NetTrain::RunNetTrain() { - auto file_name = flags_->model_file_.substr(flags_->model_file_.find_last_of(DELIM_SLASH) + 1); - bool is_train = (file_name.find("train") != std::string::npos) || !flags_->bb_model_file_.empty(); - auto status = CreateAndRunNetwork(flags_->model_file_, flags_->bb_model_file_, is_train, flags_->epochs_); - if (status != RET_OK) { - MS_LOG(ERROR) << "CreateAndRunNetwork failed for model " << flags_->model_file_ << ". Status is " << status; - std::cout << "CreateAndRunNetwork failed for model " << flags_->model_file_ << ". Status is " << status - << std::endl; - return status; - } - - status = CheckExecutionOfSavedModels(); // re-initialize sessions according to flags - if (status != RET_OK) { - MS_LOG(ERROR) << "Run CheckExecute error: " << status; - std::cout << "Run CheckExecute error: " << status << std::endl; - return status; - } - return RET_OK; -} - int NetTrain::SaveModels() { if (!flags_->export_file_.empty()) { if (flags_->bb_model_file_.empty()) { @@ -635,77 +506,6 @@ int NetTrain::SaveModels() { return RET_OK; } -int NetTrain::CheckExecutionOfSavedModels() { - int status = RET_OK; - if (!flags_->export_file_.empty()) { - status = NetTrain::CreateAndRunNetwork(flags_->export_file_, flags_->bb_model_file_, true, 0); - if (status != RET_OK) { - MS_LOG(ERROR) << "Run Exported model " << flags_->export_file_ << " error: " << status; - std::cout << "Run Exported model " << flags_->export_file_ << " error: " << status << std::endl; - return status; - } - if (flags_->bb_model_file_.empty()) { - status = NetTrain::CreateAndRunNetwork(flags_->export_file_ + "_qt", "", true, 0, false); - if (status != RET_OK) { - MS_LOG(ERROR) << "Run Exported model " << flags_->export_file_ << "_qt.ms error: " << status; - std::cout << "Run Exported model " << flags_->export_file_ << "_qt.ms error: " << status << std::endl; - return status; - } - } - } - if (!flags_->inference_file_.empty()) { - status = NetTrain::CreateAndRunNetwork(flags_->inference_file_, "", false, 0); - if (status != RET_OK) { - MS_LOG(ERROR) << "Running saved model " << flags_->inference_file_ << ".ms error: " << status; - std::cout << "Running saved model " << flags_->inference_file_ << ".ms error: " << status << std::endl; - return status; - } - status = NetTrain::CreateAndRunNetwork(flags_->inference_file_ + "_qt", "", false, 0, false); - if (status != RET_OK) { - MS_LOG(ERROR) << "Running saved model " << flags_->inference_file_ << "_qt.ms error: " << status; - std::cout << "Running saved model " << flags_->inference_file_ << "_qt.ms error: " << status << std::endl; - return status; - } - } - return status; -} - -void NetTrain::CheckSum(MSTensor *tensor, const std::string &node_type, int id, const std::string &in_out) { - if (tensor == nullptr) { - MS_LOG(ERROR) << "input tensor is nullptr."; - return; - } - int tensor_size = tensor->ElementNum(); - void *data = tensor->MutableData(); - auto *fdata = reinterpret_cast(tensor->MutableData()); - auto type = tensor->DataType(); - std::cout << node_type << " " << in_out << id << " shape=" << tensor->Shape() << " sum="; - switch (type) { - case mindspore::DataType::kNumberTypeFloat32: - TensorNan(reinterpret_cast(data), tensor_size); - std::cout << TensorSum(data, tensor_size) << std::endl; - std::cout << "tensor name: " << tensor->Name() << std::endl; - std::cout << "data: "; - for (int i = 0; i <= kPrintOffset && i < tensor_size; i++) { - std::cout << static_cast(fdata[i]) << ", "; - } - std::cout << std::endl; - break; - case mindspore::DataType::kNumberTypeInt32: - std::cout << TensorSum(data, tensor_size) << std::endl; - break; -#ifdef ENABLE_FP16 - case mindspore::DataType::kNumberTypeFloat16: - std::cout << TensorSum(data, tensor_size) << std::endl; - TensorNan(reinterpret_cast(data), tensor_size); - break; -#endif - default: - std::cout << "unsupported type:" << static_cast(type) << std::endl; - break; - } -} - int NetTrain::InitDumpTensorDataCallbackParameter() { // before callback before_call_back_ = [&](const std::vector &before_inputs, @@ -815,178 +615,6 @@ int NetTrain::InitTimeProfilingCallbackParameter() { return RET_OK; } -int NetTrain::InitCallbackParameter() { - int ret = RET_OK; - if (flags_->dump_tensor_data_) { - ret = InitDumpTensorDataCallbackParameter(); - } else if (flags_->time_profiling_) { - ret = InitTimeProfilingCallbackParameter(); - } - return ret; -} - -void NetTrainFlags::InitResizeDimsList() { - std::string content = this->resize_dims_in_; - std::vector shape; - auto shape_strs = StrSplit(content, std::string(DELIM_COLON)); - for (const auto &shape_str : shape_strs) { - shape.clear(); - auto dim_strs = StrSplit(shape_str, std::string(DELIM_COMMA)); - std::cout << "Resize Dims: "; - for (const auto &dim_str : dim_strs) { - std::cout << dim_str << " "; - shape.emplace_back(static_cast(std::stoi(dim_str))); - } - std::cout << std::endl; - this->resize_dims_.emplace_back(shape); - } -} - -int NetTrain::Init() { - if (this->flags_ == nullptr) { - return 1; - } - MS_LOG(INFO) << "ModelPath = " << this->flags_->model_file_; - MS_LOG(INFO) << "InDataPath = " << this->flags_->in_data_file_; - MS_LOG(INFO) << "InDataType = " << this->flags_->in_data_type_in_; - MS_LOG(INFO) << "Epochs = " << this->flags_->epochs_; - MS_LOG(INFO) << "AccuracyThreshold = " << this->flags_->accuracy_threshold_; - MS_LOG(INFO) << "WarmUpLoopCount = " << this->flags_->warm_up_loop_count_; - MS_LOG(INFO) << "NumThreads = " << this->flags_->num_threads_; - MS_LOG(INFO) << "expectedDataFile = " << this->flags_->data_file_; - MS_LOG(INFO) << "exportDataFile = " << this->flags_->export_file_; - MS_LOG(INFO) << "enableFp16 = " << this->flags_->enable_fp16_; - MS_LOG(INFO) << "virtualBatch = " << this->flags_->virtual_batch_; - - if (this->flags_->epochs_ < 0) { - MS_LOG(ERROR) << "epochs:" << this->flags_->epochs_ << " must be equal/greater than 0"; - std::cerr << "epochs:" << this->flags_->epochs_ << " must be equal/greater than 0" << std::endl; - return RET_ERROR; - } - - if (this->flags_->num_threads_ < 1) { - MS_LOG(ERROR) << "numThreads:" << this->flags_->num_threads_ << " must be greater than 0"; - std::cerr << "numThreads:" << this->flags_->num_threads_ << " must be greater than 0" << std::endl; - return RET_ERROR; - } - - this->flags_->in_data_type_ = this->flags_->in_data_type_in_ == "img" ? kImage : kBinary; - - if (flags_->in_data_file_.empty() && !flags_->data_file_.empty()) { - MS_LOG(ERROR) << "expectedDataFile not supported in case that inDataFile is not provided"; - std::cerr << "expectedDataFile is not supported in case that inDataFile is not provided" << std::endl; - return RET_ERROR; - } - - if (flags_->in_data_file_.empty() && !flags_->export_file_.empty()) { - MS_LOG(ERROR) << "exportDataFile not supported in case that inDataFile is not provided"; - std::cerr << "exportDataFile is not supported in case that inDataFile is not provided" << std::endl; - return RET_ERROR; - } - - if (flags_->model_file_.empty()) { - MS_LOG(ERROR) << "modelPath is required"; - std::cerr << "modelPath is required" << std::endl; - return 1; - } - - // get dump data output path - auto dump_cfg_path = std::getenv(dump::kConfigPath); - if (dump_cfg_path != nullptr) { - flags_->dump_tensor_data_ = true; - if (InitDumpConfigFromJson(dump_cfg_path) != RET_OK) { - MS_LOG(ERROR) << "parse dump config file failed."; - return RET_ERROR; - } - } else { - MS_LOG(INFO) << "No MINDSPORE_DUMP_CONFIG in env, don't need to dump data"; - } - - auto status = InitCallbackParameter(); - if (status != RET_OK) { - MS_LOG(ERROR) << "Init callback Parameter failed."; - std::cerr << "Init callback Parameter failed." << std::endl; - return RET_ERROR; - } - - flags_->InitResizeDimsList(); - if (!flags_->resize_dims_.empty() && !flags_->input_data_list_.empty() && - flags_->resize_dims_.size() != flags_->input_data_list_.size()) { - MS_LOG(ERROR) << "Size of input resizeDims should be equal to size of input inDataPath"; - std::cerr << "Size of input resizeDims should be equal to size of input inDataPath" << std::endl; - return RET_ERROR; - } - return RET_OK; -} - -namespace { -constexpr int kNumToPrint = 5; -} - -int NetTrain::InitDumpConfigFromJson(std::string path) { - auto real_path = RealPath(path.c_str()); - std::ifstream ifs(real_path); - if (!ifs.good()) { - MS_LOG(ERROR) << "file: " << real_path << " is not exist"; - return RET_ERROR; - } - if (!ifs.is_open()) { - MS_LOG(ERROR) << "file: " << real_path << " open failed"; - return RET_ERROR; - } - - try { - dump_cfg_json_ = nlohmann::json::parse(ifs); - } catch (const nlohmann::json::parse_error &error) { - MS_LOG(ERROR) << "parse json file failed, please check your file."; - return RET_ERROR; - } - if (dump_cfg_json_[dump::kSettings] == nullptr) { - MS_LOG(ERROR) << "\"common_dump_settings\" is required."; - return RET_ERROR; - } - if (dump_cfg_json_[dump::kSettings][dump::kMode] == nullptr) { - MS_LOG(ERROR) << "\"dump_mode\" is required."; - return RET_ERROR; - } - if (dump_cfg_json_[dump::kSettings][dump::kPath] == nullptr) { - MS_LOG(ERROR) << "\"path\" is required."; - return RET_ERROR; - } - if (dump_cfg_json_[dump::kSettings][dump::kNetName] == nullptr) { - dump_cfg_json_[dump::kSettings][dump::kNetName] = "default"; - } - if (dump_cfg_json_[dump::kSettings][dump::kInputOutput] == nullptr) { - dump_cfg_json_[dump::kSettings][dump::kInputOutput] = 0; - } - if (dump_cfg_json_[dump::kSettings][dump::kKernels] != nullptr && - !dump_cfg_json_[dump::kSettings][dump::kKernels].empty()) { - if (dump_cfg_json_[dump::kSettings][dump::kMode] == 0) { - MS_LOG(ERROR) << R"("dump_mode" should be 1 when "kernels" isn't empty.)"; - return RET_ERROR; - } - } - - auto abs_path = dump_cfg_json_[dump::kSettings][dump::kPath].get(); - auto net_name = dump_cfg_json_[dump::kSettings][dump::kNetName].get(); - if (abs_path.back() == '\\' || abs_path.back() == '/') { - dump_file_output_dir_ = abs_path + net_name; - } else { -#ifdef _WIN32 - dump_file_output_dir_ = abs_path + "\\" + net_name; -#else - dump_file_output_dir_ = abs_path + "/" + net_name; -#endif - } - - auto status = CreateOutputDir(&dump_file_output_dir_); - if (status != RET_OK) { - MS_LOG(ERROR) << "create data output directory failed."; - return RET_ERROR; - } - return RET_OK; -} - int NetTrain::PrintResult(const std::vector &title, const std::map> &result) { std::vector columnLenMax(kFieldsToPrint); @@ -1035,7 +663,7 @@ int NetTrain::PrintResult(const std::vector &title, } printf("-------------------------------------------------------------------------\n"); - for (int i = 0; i < kNumToPrint; i++) { + for (int i = 0; i < kFieldsToPrint; i++) { auto printBuf = title[i]; if (printBuf.size() > columnLenMax.at(i)) { columnLenMax.at(i) = printBuf.size(); @@ -1045,7 +673,7 @@ int NetTrain::PrintResult(const std::vector &title, } printf("\n"); for (auto &row : rows) { - for (int j = 0; j < kNumToPrint; j++) { + for (int j = 0; j < kFieldsToPrint; j++) { auto printBuf = row[j]; printBuf.resize(columnLenMax.at(j), ' '); printf("%s\t", printBuf.c_str()); @@ -1054,47 +682,5 @@ int NetTrain::PrintResult(const std::vector &title, } return RET_OK; } - -int RunNetTrain(int argc, const char **argv) { - NetTrainFlags flags; - Option err = flags.ParseFlags(argc, argv); - - if (err.IsSome()) { - std::cerr << err.Get() << std::endl; - std::cerr << flags.Usage() << std::endl; - return RET_ERROR; - } - - if (flags.help) { - std::cerr << flags.Usage() << std::endl; - return RET_OK; - } - if (flags.unified_api_) { - return NetTrain::RunNr(&flags); - } - NetTrain net_trainer(&flags); - auto status = net_trainer.Init(); - if (status != RET_OK) { - MS_LOG(ERROR) << "NetTrain init Error : " << status; - std::cerr << "NetTrain init Error : " << status << std::endl; - return RET_ERROR; - } - - status = net_trainer.RunNetTrain(); - if (status != RET_OK) { - MS_LOG(ERROR) << "Run NetTrain " - << flags.model_file_.substr(flags.model_file_.find_last_of(DELIM_SLASH) + 1).c_str() - << " Failed : " << status; - std::cerr << "Run NetTrain " << flags.model_file_.substr(flags.model_file_.find_last_of(DELIM_SLASH) + 1).c_str() - << " Failed : " << status << std::endl; - return RET_ERROR; - } - - MS_LOG(INFO) << "Run NetTrain " << flags.model_file_.substr(flags.model_file_.find_last_of(DELIM_SLASH) + 1).c_str() - << " Success."; - std::cout << "Run NetTrain " << flags.model_file_.substr(flags.model_file_.find_last_of(DELIM_SLASH) + 1).c_str() - << " Success." << std::endl; - return RET_OK; -} } // namespace lite } // namespace mindspore diff --git a/mindspore/lite/tools/benchmark_train/net_train.h b/mindspore/lite/tools/benchmark_train/net_train.h index 67e58a04..bdf0ec88 100644 --- a/mindspore/lite/tools/benchmark_train/net_train.h +++ b/mindspore/lite/tools/benchmark_train/net_train.h @@ -42,183 +42,22 @@ #include "tools/common/flag_parser.h" #include "src/common/file_utils.h" #include "src/common/utils.h" - -#ifdef ENABLE_FP16 -static __attribute__((always_inline)) inline bool MS_ISNAN_FP16(float16_t var) { - volatile float16_t d = var; - return d != d; -} -#endif +#include "tools/benchmark_train/net_train_base.h" namespace mindspore::lite { -enum MS_API DataType { kImage = 0, kBinary = 1 }; - -constexpr float relativeTolerance = 1e-5; -constexpr float absoluteTolerance = 1e-8; extern const std::unordered_map kTypeIdMap; extern const std::unordered_map kTensorFormatMap; -namespace dump { -constexpr auto kConfigPath = "MINDSPORE_DUMP_CONFIG"; -constexpr auto kSettings = "common_dump_settings"; -constexpr auto kMode = "dump_mode"; -constexpr auto kPath = "path"; -constexpr auto kNetName = "net_name"; -constexpr auto kInputOutput = "input_output"; -constexpr auto kKernels = "kernels"; -} // namespace dump - -template -float TensorSum(const void *data, int size) { - const T *typed_data = reinterpret_cast(data); - float sum = 0.f; - for (int i = 0; i < size; i++) { - sum += static_cast(typed_data[i]); - } - return sum; -} - -class MS_API NetTrainFlags : public virtual FlagParser { +class MS_API NetTrain : public NetTrainBase { public: - NetTrainFlags() { - // common - AddFlag(&NetTrainFlags::model_file_, "modelFile", "Input model file", ""); - AddFlag(&NetTrainFlags::bb_model_file_, "bbModelFile", "Backboine model for transfer session", ""); - AddFlag(&NetTrainFlags::in_data_file_, "inDataFile", "Input data file, if not set, use random input", ""); - // MarkPerformance - AddFlag(&NetTrainFlags::warm_up_loop_count_, "warmUpLoopCount", "Run warm up loop", 0); - AddFlag(&NetTrainFlags::time_profiling_, "timeProfiling", "Run time profiling", false); - AddFlag(&NetTrainFlags::epochs_, "epochs", "Number of training epochs to run", 1); - AddFlag(&NetTrainFlags::num_threads_, "numThreads", "Run threads number", 1); - // MarkAccuracy - AddFlag(&NetTrainFlags::data_file_, "expectedDataFile", "Expected results data file path", ""); - AddFlag(&NetTrainFlags::export_file_, "exportFile", "MS File to export trained model into", ""); - AddFlag(&NetTrainFlags::accuracy_threshold_, "accuracyThreshold", "Threshold of accuracy", 0.5); - AddFlag(&NetTrainFlags::layer_checksum_, "layerCheckSum", "layer output checksum print (debug)", false); - AddFlag(&NetTrainFlags::enable_fp16_, "enableFp16", "Enable float16", false); - AddFlag(&NetTrainFlags::loss_name_, "lossName", "loss layer name", ""); - AddFlag(&NetTrainFlags::inference_file_, "inferenceFile", "MS file to export inference model", ""); - AddFlag(&NetTrainFlags::virtual_batch_, "virtualBatch", "use virtual batch", false); - AddFlag(&NetTrainFlags::resize_dims_in_, "inputShapes", - "Shape of input data, the format should be NHWC. e.g. 1,32,32,32:1,1,32,32,1", ""); - AddFlag(&NetTrainFlags::unified_api_, "unifiedApi", "do unified api test", false); - } - - ~NetTrainFlags() override = default; - void InitResizeDimsList(); + explicit NetTrain(NetTrainFlags *flags) : NetTrainBase(flags) {} + virtual ~NetTrain() {} - public: - // common - std::string model_file_; - std::string in_data_file_; - std::string bb_model_file_; - std::vector input_data_list_; - DataType in_data_type_; - std::string in_data_type_in_ = "bin"; - int cpu_bind_mode_ = 1; - bool enable_fp16_ = false; - bool virtual_batch_ = false; - // MarkPerformance - int num_threads_ = 1; - int warm_up_loop_count_ = 0; - bool time_profiling_; - int epochs_ = 1; - // MarkAccuracy - std::string data_file_; - std::string data_type_ = "FLOAT"; - float accuracy_threshold_; - // Resize - std::string export_file_ = ""; - std::string resize_dims_in_ = ""; - bool layer_checksum_ = false; - std::vector> resize_dims_; - std::string loss_name_ = ""; - std::string inference_file_ = ""; - bool unified_api_ = false; - bool dump_tensor_data_ = false; -}; - -class MS_API NetTrain { - public: - explicit NetTrain(NetTrainFlags *flags) : flags_(flags) {} - virtual ~NetTrain() = default; - - int Init(); - int RunNetTrain(); - static float *ReadFileBuf(const std::string file, size_t *size); - static int SetNr(std::function param); - static int RunNr(NetTrainFlags *flags) { - if (nr_cb_ != nullptr) { - return nr_cb_(flags); - } - MS_LOG(WARNING) << "unified api was not tested"; - std::cout << "unified api was not tested"; - return RET_OK; - } - // tensorData need to be converter first - template - static float CompareData(const float *refOutput, int size, const T *msTensorData) { - size_t errorCount = 0; - float meanError = 0; - std::cout << "Out tensor size is: " << size << std::endl; - std::cout << "Data of model output: "; - for (int j = 0; j < std::min(50, size); j++) { - std::cout << static_cast(msTensorData[j]) << " "; - } - std::cout << std::endl; - std::cout << "Data of Ref output : "; - for (int j = 0; j < std::min(50, size); j++) { - std::cout << refOutput[j] << " "; - } - std::cout << std::endl; - for (int j = 0; j < size; j++) { - if (std::isnan(msTensorData[j]) || std::isinf(msTensorData[j])) { - std::cerr << "Output tensor has nan or inf data, compare fail" << std::endl; - MS_LOG(ERROR) << "Output tensor has nan or inf data, compare fail"; - return RET_ERROR; - } - - auto tolerance = absoluteTolerance + relativeTolerance * fabs(refOutput[j]); - auto absoluteError = std::fabs(static_cast(msTensorData[j]) - refOutput[j]); - if (absoluteError > tolerance) { - if (fabs(refOutput[j]) == 0) { - if (absoluteError > 1e-5) { - meanError += absoluteError; - errorCount++; - } else { - continue; - } - } else { - // just assume that atol = rtol - meanError += absoluteError / (fabs(refOutput[j]) + FLT_MIN); - errorCount++; - } - } - } - std::cout << std::endl; - if (meanError > 0.0f) { - meanError /= errorCount; - } - - if (meanError <= 0.0000001) { - std::cout << "Mean bias of tensor: 0%" << std::endl; - } else { - std::cout << "Mean bias of tensor: " << meanError * 100 << "%" << std::endl; - } - return meanError; - } - int InitDumpConfigFromJson(std::string path); - - private: - // call GenerateInputData or ReadInputFile to init inputTensors - int LoadInput(); - void CheckSum(MSTensor *tensor, const std::string &node_type, int id, const std::string &in_out); + protected: // call GenerateRandomData to fill inputTensors - int GenerateInputData(); + int GenerateInputData() override; - int GenerateRandomData(mindspore::MSTensor *tensor); - - int ReadInputFile(); + int ReadInputFile() override; int LoadStepInput(size_t step); @@ -227,20 +66,19 @@ class MS_API NetTrain { void InitTrainCfg(const std::shared_ptr &train_cfg); int CreateAndRunNetwork(const std::string &filename, const std::string &bb_filename, bool is_train, int epochs, - bool check_accuracy = true); + bool check_accuracy = true) override; int CreateAndRunNetworkForInference(const std::string &filename, const std::shared_ptr &context); int CreateAndRunNetworkForTrain(const std::string &filename, const std::string &bb_filename, const std::shared_ptr &context, const std::shared_ptr &train_cfg, int epochs); - int InitCallbackParameter(); - int InitDumpTensorDataCallbackParameter(); + int InitDumpTensorDataCallbackParameter() override; - int InitTimeProfilingCallbackParameter(); + int InitTimeProfilingCallbackParameter() override; - int PrintResult(const std::vector &title, const std::map> &result); + int PrintResult(const std::vector &title, const std::map> &result) override; template void PrintInputData(mindspore::MSTensor *input) { @@ -256,39 +94,11 @@ class MS_API NetTrain { std::cout << std::endl; } - template - std::vector ConverterToInt64Vector(const std::vector &srcDims) { - std::vector dims; - for (auto shape : srcDims) { - dims.push_back(static_cast(shape)); - } - return dims; - } - int MarkPerformance(); - int MarkAccuracy(bool enforce_accuracy = true); - int CompareOutput(); - int SaveModels(); - int CheckExecutionOfSavedModels(); - void TensorNan(const float *data, int size) { - for (int i = 0; i < size; i++) { - if (std::isnan(data[i])) { - std::cout << "nan value of index=" << i << ", " << data[i] << std::endl; - break; - } - } - } -#ifdef ENABLE_FP16 - void TensorNan(float16_t *data, int size) { - for (int i = 0; i < size; i++) { - if (MS_ISNAN_FP16(data[i]) || std::isinf(data[i])) { - std::cout << "nan or inf value of index=" << i << ", " << data[i] << std::endl; - break; - } - } - } -#endif - NetTrainFlags *flags_{nullptr}; - static std::function nr_cb_; + int MarkPerformance() override; + int MarkAccuracy(bool enforce_accuracy = true) override; + int CompareOutput() override; + int SaveModels() override; + // callback parameters uint64_t op_begin_ = 0; int op_call_times_total_ = 0; @@ -301,13 +111,6 @@ class MS_API NetTrain { mindspore::MSKernelCallBack before_call_back_{nullptr}; mindspore::MSKernelCallBack after_call_back_{nullptr}; - nlohmann::json dump_cfg_json_; - std::string dump_file_output_dir_; - std::vector> inputs_buf_; - std::vector inputs_size_; - size_t batch_num_ = 0; }; - -int MS_API RunNetTrain(int argc, const char **argv); } // namespace mindspore::lite #endif // MINDSPORE_LITE_TOOLS_BENCHMARK_TRAIN_NET_TRAIN_H_ diff --git a/mindspore/lite/tools/benchmark_train/net_train_base.cc b/mindspore/lite/tools/benchmark_train/net_train_base.cc new file mode 100644 index 00000000..8d3c75de --- /dev/null +++ b/mindspore/lite/tools/benchmark_train/net_train_base.cc @@ -0,0 +1,410 @@ +/** + * Copyright 2020-2023 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "tools/benchmark_train/net_train_base.h" +#define __STDC_FORMAT_MACROS +#undef __STDC_FORMAT_MACROS +#include +#include +#ifdef ENABLE_NEON +#include +#endif +#include "src/common/common.h" +#include "include/api/serialization.h" + +namespace mindspore { +namespace lite { +const std::unordered_map kTypeIdMap{ + {kNumberTypeFloat16, "Float16"}, {kNumberTypeFloat, "Float32"}, {kNumberTypeFloat32, "Float32"}, + {kNumberTypeInt8, "Int8"}, {kNumberTypeInt16, "Int16"}, {kNumberTypeInt, "Int32"}, + {kNumberTypeInt32, "Int32"}, {kNumberTypeUInt8, "UInt8"}, {kNumberTypeUInt16, "UInt16"}, + {kNumberTypeUInt, "UInt32"}, {kNumberTypeUInt32, "UInt32"}, {kObjectTypeString, "String"}, + {kNumberTypeBool, "Bool"}, {kObjectTypeTensorType, "Tensor"}}; + +const std::unordered_map kTensorFormatMap{ + {mindspore::NCHW, "NCHW"}, {mindspore::NHWC, "NHWC"}, {mindspore::NHWC4, "NHWC4"}, {mindspore::HWKC, "HWKC"}, + {mindspore::HWCK, "HWCK"}, {mindspore::KCHW, "KCHW"}, {mindspore::CKHW, "CKHW"}, {mindspore::KHWC, "KHWC"}, + {mindspore::CHWK, "CHWK"}, {mindspore::HW, "HW"}, {mindspore::HW4, "HW4"}, {mindspore::NC, "NC"}, + {mindspore::NC4, "NC4"}, {mindspore::NC4HW4, "NC4HW4"}, {mindspore::NCDHW, "NCDHW"}}; + +std::function NetTrainBase::nr_cb_ = nullptr; + +int NetTrainBase::SetNr(std::function param) { + nr_cb_ = param; + return 0; +} + +float *NetTrainBase::ReadFileBuf(const std::string file, size_t *size) { + if (file.empty()) { + MS_LOG(ERROR) << "file is nullptr"; + return nullptr; + } + MS_ASSERT(size != nullptr); + std::string real_path = RealPath(file.c_str()); + std::ifstream ifs(real_path); + if (!ifs.good()) { + MS_LOG(ERROR) << "file: " << real_path << " is not exist"; + return nullptr; + } + + if (!ifs.is_open()) { + MS_LOG(ERROR) << "file: " << real_path << " open failed"; + return nullptr; + } + + ifs.seekg(0, std::ios::end); + *size = ifs.tellg(); + std::unique_ptr buf = std::make_unique(*size / sizeof(float) + 1); + if (buf == nullptr) { + MS_LOG(ERROR) << "malloc buf failed, file: " << real_path; + ifs.close(); + return nullptr; + } + + ifs.seekg(0, std::ios::beg); + ifs.read(reinterpret_cast(buf.get()), *size); + ifs.close(); + + return buf.release(); +} + +int NetTrainBase::GenerateRandomData(mindspore::MSTensor *tensor) { + auto input_data = tensor->MutableData(); + if (input_data == nullptr) { + MS_LOG(ERROR) << "MallocData for inTensor failed"; + return RET_ERROR; + } + auto tensor_byte_size = tensor->DataSize(); + char *casted_data = static_cast(input_data); + for (size_t i = 0; i < tensor_byte_size; i++) { + casted_data[i] = + (tensor->DataType() == mindspore::DataType::kNumberTypeFloat32) ? static_cast(i) : static_cast(0); + } + return RET_OK; +} + +int NetTrainBase::LoadInput() { + inputs_buf_.clear(); + inputs_size_.clear(); + batch_num_ = 0; + if (flags_->in_data_file_.empty()) { + auto status = GenerateInputData(); + if (status != RET_OK) { + std::cerr << "Generate input data error " << status << std::endl; + MS_LOG(ERROR) << "Generate input data error " << status; + return status; + } + } else { + auto status = ReadInputFile(); + if (status != RET_OK) { + std::cerr << "Read Input File error, " << status << std::endl; + MS_LOG(ERROR) << "Read Input File error, " << status; + return status; + } + } + return RET_OK; +} + +int NetTrainBase::RunNetTrain() { + auto file_name = flags_->model_file_.substr(flags_->model_file_.find_last_of(DELIM_SLASH) + 1); + bool is_train = (file_name.find("train") != std::string::npos) || !flags_->bb_model_file_.empty(); + auto status = CreateAndRunNetwork(flags_->model_file_, flags_->bb_model_file_, is_train, flags_->epochs_); + if (status != RET_OK) { + MS_LOG(ERROR) << "CreateAndRunNetwork failed for model " << flags_->model_file_ << ". Status is " << status; + std::cout << "CreateAndRunNetwork failed for model " << flags_->model_file_ << ". Status is " << status + << std::endl; + return status; + } + + status = CheckExecutionOfSavedModels(); // re-initialize sessions according to flags + if (status != RET_OK) { + MS_LOG(ERROR) << "Run CheckExecute error: " << status; + std::cout << "Run CheckExecute error: " << status << std::endl; + return status; + } + return RET_OK; +} + +int NetTrainBase::CheckExecutionOfSavedModels() { + int status = RET_OK; + if (!flags_->export_file_.empty()) { + status = CreateAndRunNetwork(flags_->export_file_, flags_->bb_model_file_, true, 0); + if (status != RET_OK) { + MS_LOG(ERROR) << "Run Exported model " << flags_->export_file_ << " error: " << status; + std::cout << "Run Exported model " << flags_->export_file_ << " error: " << status << std::endl; + return status; + } + if (flags_->bb_model_file_.empty()) { + status = CreateAndRunNetwork(flags_->export_file_ + "_qt", "", true, 0, false); + if (status != RET_OK) { + MS_LOG(ERROR) << "Run Exported model " << flags_->export_file_ << "_qt.ms error: " << status; + std::cout << "Run Exported model " << flags_->export_file_ << "_qt.ms error: " << status << std::endl; + return status; + } + } + } + if (!flags_->inference_file_.empty()) { + status = CreateAndRunNetwork(flags_->inference_file_, "", false, 0); + if (status != RET_OK) { + MS_LOG(ERROR) << "Running saved model " << flags_->inference_file_ << ".ms error: " << status; + std::cout << "Running saved model " << flags_->inference_file_ << ".ms error: " << status << std::endl; + return status; + } + status = CreateAndRunNetwork(flags_->inference_file_ + "_qt", "", false, 0, false); + if (status != RET_OK) { + MS_LOG(ERROR) << "Running saved model " << flags_->inference_file_ << "_qt.ms error: " << status; + std::cout << "Running saved model " << flags_->inference_file_ << "_qt.ms error: " << status << std::endl; + return status; + } + } + return status; +} + +void NetTrainBase::CheckSum(MSTensor *tensor, const std::string &node_type, int id, const std::string &in_out) { + if (tensor == nullptr) { + MS_LOG(ERROR) << "input tensor is nullptr."; + return; + } + int tensor_size = tensor->ElementNum(); + void *data = tensor->MutableData(); + auto *fdata = reinterpret_cast(tensor->MutableData()); + auto type = tensor->DataType(); + std::cout << node_type << " " << in_out << id << " shape=" << tensor->Shape() << " sum="; + switch (type) { + case mindspore::DataType::kNumberTypeFloat32: + TensorNan(reinterpret_cast(data), tensor_size); + std::cout << TensorSum(data, tensor_size) << std::endl; + std::cout << "tensor name: " << tensor->Name() << std::endl; + std::cout << "data: "; + for (int i = 0; i <= kPrintOffset && i < tensor_size; i++) { + std::cout << static_cast(fdata[i]) << ", "; + } + std::cout << std::endl; + break; + case mindspore::DataType::kNumberTypeInt32: + std::cout << TensorSum(data, tensor_size) << std::endl; + break; +#ifdef ENABLE_FP16 + case mindspore::DataType::kNumberTypeFloat16: + std::cout << TensorSum(data, tensor_size) << std::endl; + TensorNan(reinterpret_cast(data), tensor_size); + break; +#endif + default: + std::cout << "unsupported type:" << static_cast(type) << std::endl; + break; + } +} + +std::string NetTrainBase::GenerateOutputFileName(mindspore::MSTensor *tensor, const std::string &op_name, + const std::string &file_type, const size_t &idx) { + std::string file_name = op_name; + auto pos = file_name.find_first_of('/'); + while (pos != std::string::npos) { + file_name.replace(pos, 1, "."); + pos = file_name.find_first_of('/'); + } + file_name += "_" + file_type + "_" + std::to_string(idx) + "_shape_"; + for (const auto &dim : tensor->Shape()) { + file_name += std::to_string(dim) + "_"; + } + if (kTypeIdMap.find(static_cast(tensor->DataType())) != kTypeIdMap.end()) { + file_name += kTypeIdMap.at(static_cast(tensor->DataType())); + } + auto tensor_format = tensor->format(); + if (kTensorFormatMap.find(tensor_format) != kTensorFormatMap.end()) { + file_name += "_" + kTensorFormatMap.at(tensor_format) + ".bin"; + } + + file_name += ".bin"; + return file_name; +} + +int NetTrainBase::InitCallbackParameter() { + int ret = RET_OK; + if (flags_->dump_tensor_data_) { + ret = InitDumpTensorDataCallbackParameter(); + } else if (flags_->time_profiling_) { + ret = InitTimeProfilingCallbackParameter(); + } + return ret; +} + +void NetTrainFlags::InitResizeDimsList() { + std::string content = this->resize_dims_in_; + if (content.empty()) { + return; + } + std::vector shape; + auto shape_strs = StrSplit(content, std::string(DELIM_COLON)); + for (const auto &shape_str : shape_strs) { + shape.clear(); + auto dim_strs = StrSplit(shape_str, std::string(DELIM_COMMA)); + std::cout << "Resize Dims: "; + for (const auto &dim_str : dim_strs) { + std::cout << dim_str << " "; + shape.emplace_back(static_cast(std::stoi(dim_str))); + } + std::cout << std::endl; + this->resize_dims_.emplace_back(shape); + } +} + +int NetTrainBase::Init() { + if (this->flags_ == nullptr) { + return 1; + } + MS_LOG(INFO) << "ModelPath = " << this->flags_->model_file_; + MS_LOG(INFO) << "InDataPath = " << this->flags_->in_data_file_; + MS_LOG(INFO) << "InDataType = " << this->flags_->in_data_type_in_; + MS_LOG(INFO) << "Epochs = " << this->flags_->epochs_; + MS_LOG(INFO) << "AccuracyThreshold = " << this->flags_->accuracy_threshold_; + MS_LOG(INFO) << "WarmUpLoopCount = " << this->flags_->warm_up_loop_count_; + MS_LOG(INFO) << "NumThreads = " << this->flags_->num_threads_; + MS_LOG(INFO) << "expectedDataFile = " << this->flags_->data_file_; + MS_LOG(INFO) << "exportDataFile = " << this->flags_->export_file_; + MS_LOG(INFO) << "enableFp16 = " << this->flags_->enable_fp16_; + MS_LOG(INFO) << "virtualBatch = " << this->flags_->virtual_batch_; + + if (this->flags_->epochs_ < 0) { + MS_LOG(ERROR) << "epochs:" << this->flags_->epochs_ << " must be equal/greater than 0"; + std::cerr << "epochs:" << this->flags_->epochs_ << " must be equal/greater than 0" << std::endl; + return RET_ERROR; + } + + if (this->flags_->num_threads_ < 1) { + MS_LOG(ERROR) << "numThreads:" << this->flags_->num_threads_ << " must be greater than 0"; + std::cerr << "numThreads:" << this->flags_->num_threads_ << " must be greater than 0" << std::endl; + return RET_ERROR; + } + + this->flags_->in_data_type_ = this->flags_->in_data_type_in_ == "img" ? kImage : kBinary; + + if (flags_->in_data_file_.empty() && !flags_->data_file_.empty()) { + MS_LOG(ERROR) << "expectedDataFile not supported in case that inDataFile is not provided"; + std::cerr << "expectedDataFile is not supported in case that inDataFile is not provided" << std::endl; + return RET_ERROR; + } + + if (flags_->in_data_file_.empty() && !flags_->export_file_.empty()) { + MS_LOG(ERROR) << "exportDataFile not supported in case that inDataFile is not provided"; + std::cerr << "exportDataFile is not supported in case that inDataFile is not provided" << std::endl; + return RET_ERROR; + } + + if (flags_->model_file_.empty()) { + MS_LOG(ERROR) << "modelPath is required"; + std::cerr << "modelPath is required" << std::endl; + return 1; + } + + // get dump data output path + auto dump_cfg_path = std::getenv(dump::kConfigPath); + if (dump_cfg_path != nullptr) { + flags_->dump_tensor_data_ = true; + if (InitDumpConfigFromJson(dump_cfg_path) != RET_OK) { + MS_LOG(ERROR) << "parse dump config file failed."; + return RET_ERROR; + } + } else { + MS_LOG(INFO) << "No MINDSPORE_DUMP_CONFIG in env, don't need to dump data"; + } + + auto status = InitCallbackParameter(); + if (status != RET_OK) { + MS_LOG(ERROR) << "Init callback Parameter failed."; + std::cerr << "Init callback Parameter failed." << std::endl; + return RET_ERROR; + } + + flags_->InitResizeDimsList(); + if (!flags_->resize_dims_.empty() && !flags_->input_data_list_.empty() && + flags_->resize_dims_.size() != flags_->input_data_list_.size()) { + MS_LOG(ERROR) << "Size of input resizeDims should be equal to size of input inDataPath"; + std::cerr << "Size of input resizeDims should be equal to size of input inDataPath" << std::endl; + return RET_ERROR; + } + return RET_OK; +} + +int NetTrainBase::InitDumpConfigFromJson(std::string path) { + auto real_path = RealPath(path.c_str()); + std::ifstream ifs(real_path); + if (!ifs.good()) { + MS_LOG(ERROR) << "file: " << real_path << " is not exist"; + return RET_ERROR; + } + if (!ifs.is_open()) { + MS_LOG(ERROR) << "file: " << real_path << " open failed"; + return RET_ERROR; + } + + try { + dump_cfg_json_ = nlohmann::json::parse(ifs); + } catch (const nlohmann::json::parse_error &error) { + MS_LOG(ERROR) << "parse json file failed, please check your file."; + return RET_ERROR; + } + if (dump_cfg_json_[dump::kSettings] == nullptr) { + MS_LOG(ERROR) << "\"common_dump_settings\" is required."; + return RET_ERROR; + } + if (dump_cfg_json_[dump::kSettings][dump::kMode] == nullptr) { + MS_LOG(ERROR) << "\"dump_mode\" is required."; + return RET_ERROR; + } + if (dump_cfg_json_[dump::kSettings][dump::kPath] == nullptr) { + MS_LOG(ERROR) << "\"path\" is required."; + return RET_ERROR; + } + if (dump_cfg_json_[dump::kSettings][dump::kNetName] == nullptr) { + dump_cfg_json_[dump::kSettings][dump::kNetName] = "default"; + } + if (dump_cfg_json_[dump::kSettings][dump::kInputOutput] == nullptr) { + dump_cfg_json_[dump::kSettings][dump::kInputOutput] = 0; + } + if (dump_cfg_json_[dump::kSettings][dump::kKernels] != nullptr && + !dump_cfg_json_[dump::kSettings][dump::kKernels].empty()) { + if (dump_cfg_json_[dump::kSettings][dump::kMode] == 0) { + MS_LOG(ERROR) << R"("dump_mode" should be 1 when "kernels" isn't empty.)"; + return RET_ERROR; + } + } + + auto abs_path = dump_cfg_json_[dump::kSettings][dump::kPath].get(); + auto net_name = dump_cfg_json_[dump::kSettings][dump::kNetName].get(); + if (abs_path.back() == '\\' || abs_path.back() == '/') { + dump_file_output_dir_ = abs_path + net_name; + } else { +#ifdef _WIN32 + dump_file_output_dir_ = abs_path + "\\" + net_name; +#else + dump_file_output_dir_ = abs_path + "/" + net_name; +#endif + } + + auto status = CreateOutputDir(&dump_file_output_dir_); + if (status != RET_OK) { + MS_LOG(ERROR) << "create data output directory failed."; + return RET_ERROR; + } + return RET_OK; +} + +NetTrainBase:: ~NetTrainBase() { +} +} // namespace lite +} // namespace mindspore diff --git a/mindspore/lite/tools/benchmark_train/net_train_base.h b/mindspore/lite/tools/benchmark_train/net_train_base.h new file mode 100644 index 00000000..e3d5f39a --- /dev/null +++ b/mindspore/lite/tools/benchmark_train/net_train_base.h @@ -0,0 +1,288 @@ +/** + * Copyright 2020-2023 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_LITE_TOOLS_BENCHMARK_TRAIN_NET_TRAIN_BASE_H_ +#define MINDSPORE_LITE_TOOLS_BENCHMARK_TRAIN_NET_TRAIN_BASE_H_ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "include/api/model.h" +#include "include/api/types.h" +#include "include/api/context.h" +#include "include/api/cfg.h" + +#ifdef ENABLE_FP16 +#include +#endif +#include "tools/common/flag_parser.h" +#include "src/common/file_utils.h" +#include "src/common/utils.h" + +#ifdef ENABLE_FP16 +static __attribute__((always_inline)) inline bool MS_ISNAN_FP16(float16_t var) { + volatile float16_t d = var; + return d != d; +} +#endif + +namespace mindspore::lite { +enum MS_API DataType { kImage = 0, kBinary = 1 }; + +constexpr float relativeTolerance = 1e-5; +constexpr float absoluteTolerance = 1e-8; +extern const std::unordered_map kTypeIdMap; +extern const std::unordered_map kTensorFormatMap; + +constexpr const char *DELIM_SLASH = "/"; +constexpr const char *DELIM_COLON = ":"; +constexpr const char *DELIM_COMMA = ","; + +constexpr int RET_TOO_BIG = -9; +constexpr int kFieldsToPrint = 5; +constexpr int kPrintOffset = 4; +constexpr int kDumpInputsAndOutputs = 0; +constexpr int kDumpOutputs = 2; +constexpr int kTHOUSAND = 1000; + +namespace dump { +constexpr auto kConfigPath = "MINDSPORE_DUMP_CONFIG"; +constexpr auto kSettings = "common_dump_settings"; +constexpr auto kMode = "dump_mode"; +constexpr auto kPath = "path"; +constexpr auto kNetName = "net_name"; +constexpr auto kInputOutput = "input_output"; +constexpr auto kKernels = "kernels"; +} // namespace dump + +template +float TensorSum(const void *data, int size) { + const T *typed_data = reinterpret_cast(data); + float sum = 0.f; + for (int i = 0; i < size; i++) { + sum += static_cast(typed_data[i]); + } + return sum; +} + +class MS_API NetTrainFlags : public virtual FlagParser { + public: + NetTrainFlags() { + // common + AddFlag(&NetTrainFlags::model_file_, "modelFile", "Input model file", ""); + AddFlag(&NetTrainFlags::bb_model_file_, "bbModelFile", "Backboine model for transfer session", ""); + AddFlag(&NetTrainFlags::in_data_file_, "inDataFile", "Input data file, if not set, use random input", ""); + // MarkPerformance + AddFlag(&NetTrainFlags::warm_up_loop_count_, "warmUpLoopCount", "Run warm up loop", 0); + AddFlag(&NetTrainFlags::time_profiling_, "timeProfiling", "Run time profiling", false); + AddFlag(&NetTrainFlags::epochs_, "epochs", "Number of training epochs to run", 1); + AddFlag(&NetTrainFlags::num_threads_, "numThreads", "Run threads number", 1); + // MarkAccuracy + AddFlag(&NetTrainFlags::data_file_, "expectedDataFile", "Expected results data file path", ""); + AddFlag(&NetTrainFlags::export_file_, "exportFile", "MS File to export trained model into", ""); + AddFlag(&NetTrainFlags::accuracy_threshold_, "accuracyThreshold", "Threshold of accuracy", 0.5); + AddFlag(&NetTrainFlags::layer_checksum_, "layerCheckSum", "layer output checksum print (debug)", false); + AddFlag(&NetTrainFlags::enable_fp16_, "enableFp16", "Enable float16", false); + AddFlag(&NetTrainFlags::loss_name_, "lossName", "loss layer name", ""); + AddFlag(&NetTrainFlags::inference_file_, "inferenceFile", "MS file to export inference model", ""); + AddFlag(&NetTrainFlags::virtual_batch_, "virtualBatch", "use virtual batch", false); + AddFlag(&NetTrainFlags::resize_dims_in_, "inputShapes", + "Shape of input data, the format should be NHWC. e.g. 1,32,32,32:1,1,32,32,1", ""); + AddFlag(&NetTrainFlags::unified_api_, "unifiedApi", "do unified api test", false); + } + + ~NetTrainFlags() override = default; + void InitResizeDimsList(); + + public: + // common + std::string model_file_; + std::string in_data_file_; + std::string bb_model_file_; + std::vector input_data_list_; + DataType in_data_type_; + std::string in_data_type_in_ = "bin"; + int cpu_bind_mode_ = 1; + bool enable_fp16_ = false; + bool virtual_batch_ = false; + // MarkPerformance + int num_threads_ = 1; + int warm_up_loop_count_ = 0; + bool time_profiling_; + int epochs_ = 1; + // MarkAccuracy + std::string data_file_; + std::string data_type_ = "FLOAT"; + float accuracy_threshold_; + // Resize + std::string export_file_ = ""; + std::string resize_dims_in_ = ""; + bool layer_checksum_ = false; + std::vector> resize_dims_; + std::string loss_name_ = ""; + std::string inference_file_ = ""; + bool unified_api_ = false; + bool dump_tensor_data_ = false; +}; + +class MS_API NetTrainBase { + public: + explicit NetTrainBase(NetTrainFlags *flags) : flags_(flags) {} + virtual ~NetTrainBase(); + + int Init(); + int RunNetTrain(); + static float *ReadFileBuf(const std::string file, size_t *size); + static int SetNr(std::function param); + static int RunNr(NetTrainFlags *flags) { + if (nr_cb_ != nullptr) { + return nr_cb_(flags); + } + MS_LOG(WARNING) << "unified api was not tested"; + std::cout << "unified api was not tested"; + return RET_OK; + } + // tensorData need to be converter first + template + static float CompareData(const float *refOutput, int size, const T *msTensorData) { + size_t errorCount = 0; + float meanError = 0; + std::cout << "Out tensor size is: " << size << std::endl; + std::cout << "Data of model output: "; + for (int j = 0; j < std::min(50, size); j++) { + std::cout << static_cast(msTensorData[j]) << " "; + } + std::cout << std::endl; + std::cout << "Data of Ref output : "; + for (int j = 0; j < std::min(50, size); j++) { + std::cout << refOutput[j] << " "; + } + std::cout << std::endl; + for (int j = 0; j < size; j++) { + if (std::isnan(msTensorData[j]) || std::isinf(msTensorData[j])) { + std::cerr << "Output tensor has nan or inf data, compare fail" << std::endl; + MS_LOG(ERROR) << "Output tensor has nan or inf data, compare fail"; + return RET_ERROR; + } + + auto tolerance = absoluteTolerance + relativeTolerance * fabs(refOutput[j]); + auto absoluteError = std::fabs(static_cast(msTensorData[j]) - refOutput[j]); + if (absoluteError > tolerance) { + if (fabs(refOutput[j]) == 0) { + if (absoluteError > 1e-5) { + meanError += absoluteError; + errorCount++; + } else { + continue; + } + } else { + // just assume that atol = rtol + meanError += absoluteError / (fabs(refOutput[j]) + FLT_MIN); + errorCount++; + } + } + } + std::cout << std::endl; + if (meanError > 0.0f) { + meanError /= errorCount; + } + + if (meanError <= 0.0000001) { + std::cout << "Mean bias of tensor: 0%" << std::endl; + } else { + std::cout << "Mean bias of tensor: " << meanError * 100 << "%" << std::endl; + } + return meanError; + } + int InitDumpConfigFromJson(std::string path); + + protected: + // call GenerateInputData or ReadInputFile to init inputTensors + int LoadInput(); + void CheckSum(MSTensor *tensor, const std::string &node_type, int id, const std::string &in_out); + // call GenerateRandomData to fill inputTensors + virtual int GenerateInputData() = 0; + + int GenerateRandomData(mindspore::MSTensor *tensor); + + std::string GenerateOutputFileName(mindspore::MSTensor *tensor, const std::string &op_name, + const std::string &file_type, const size_t &idx); + virtual int ReadInputFile() = 0; + + virtual int CreateAndRunNetwork(const std::string &filename, const std::string &bb_filename, bool is_train, int epochs, + bool check_accuracy = true) = 0; + + int InitCallbackParameter(); + + virtual int InitDumpTensorDataCallbackParameter() = 0; + + virtual int InitTimeProfilingCallbackParameter() = 0; + + virtual int PrintResult(const std::vector &title, const std::map> &result) = 0; + + template + std::vector ConverterToInt64Vector(const std::vector &srcDims) { + std::vector dims; + for (auto shape : srcDims) { + dims.push_back(static_cast(shape)); + } + return dims; + } + virtual int MarkPerformance() = 0; + virtual int MarkAccuracy(bool enforce_accuracy = true) = 0; + virtual int CompareOutput() = 0; + virtual int SaveModels() = 0; + int CheckExecutionOfSavedModels(); + void TensorNan(const float *data, int size) { + for (int i = 0; i < size; i++) { + if (std::isnan(data[i])) { + std::cout << "nan value of index=" << i << ", " << data[i] << std::endl; + break; + } + } + } +#ifdef ENABLE_FP16 + void TensorNan(float16_t *data, int size) { + for (int i = 0; i < size; i++) { + if (MS_ISNAN_FP16(data[i]) || std::isinf(data[i])) { + std::cout << "nan or inf value of index=" << i << ", " << data[i] << std::endl; + break; + } + } + } +#endif + NetTrainFlags *flags_{nullptr}; + static std::function nr_cb_; + + nlohmann::json dump_cfg_json_; + std::string dump_file_output_dir_; + std::vector> inputs_buf_; + std::vector inputs_size_; + size_t batch_num_ = 0; +}; +} // namespace mindspore::lite +#endif // MINDSPORE_LITE_TOOLS_BENCHMARK_TRAIN_NET_TRAIN_BASE_H_ diff --git a/mindspore/lite/tools/benchmark_train/net_train_c_api.cc b/mindspore/lite/tools/benchmark_train/net_train_c_api.cc new file mode 100644 index 00000000..4dcf3af6 --- /dev/null +++ b/mindspore/lite/tools/benchmark_train/net_train_c_api.cc @@ -0,0 +1,659 @@ +/** + * Copyright 2023-2023 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "net_train_c_api.h" +#include "securec/include/securec.h" + +namespace mindspore { +namespace lite { +uint64_t g_op_begin_ = 0; +int g_op_call_times_total_ = 0; +float g_op_cost_total_ = 0.0f; + +int NetTrainCApi::GenerateInputData() { + for (size_t i = 0; i < ms_inputs_for_api_.handle_num; i++) { + OH_AI_TensorHandle tensor = ms_inputs_for_api_.handle_list[i]; + auto data_type = OH_AI_TensorGetDataType(tensor); + if (data_type == OH_AI_DATATYPE_OBJECTTYPE_STRING) { + MS_LOG(ERROR) << "Unsupported OH_AI_DATATYPE_OBJECTTYPE_STRING"; + return RET_ERROR; + } else { + (void)GenerateRandomData(static_cast(tensor)); + } + } + return RET_OK; +} + +int NetTrainCApi::SaveModels() { + if (!flags_->export_file_.empty()) { + if (flags_->bb_model_file_.empty()) { + auto status = OH_AI_ExportModel(ms_model_, OH_AI_MODELTYPE_MINDIR, (flags_->export_file_ + "_qt").c_str(), OH_AI_WEIGHT_QUANT, false, + nullptr, 0); + if (status != OH_AI_STATUS_SUCCESS) { + MS_LOG(ERROR) << "Export quantized model error " << flags_->export_file_ + "_qt"; + std::cout << "Export quantized model error " << flags_->export_file_ + "_qt" << std::endl; + return RET_ERROR; + } + } + auto status = OH_AI_ExportModel(ms_model_, OH_AI_MODELTYPE_MINDIR, (flags_->export_file_).c_str(), OH_AI_NO_QUANT, false, + nullptr, 0); + + if (status != OH_AI_STATUS_SUCCESS) { + MS_LOG(ERROR) << "Export non quantized model error " << flags_->export_file_; + std::cout << "Export non quantized model error " << flags_->export_file_ << std::endl; + return RET_ERROR; + } + } + if (!flags_->inference_file_.empty()) { + auto status = OH_AI_ExportModel(ms_model_, OH_AI_MODELTYPE_MINDIR, (flags_->inference_file_ + "_qt").c_str(), OH_AI_WEIGHT_QUANT, true, + nullptr, 0); + if (status != OH_AI_STATUS_SUCCESS) { + MS_LOG(ERROR) << "Export quantized inference model error " << flags_->inference_file_ + "_qt"; + std::cout << "Export quantized inference model error " << flags_->inference_file_ + "_qt" << std::endl; + return RET_ERROR; + } + + auto tick = GetTimeUs(); + status = OH_AI_ExportModel(ms_model_, OH_AI_MODELTYPE_MINDIR, (flags_->inference_file_).c_str(), OH_AI_NO_QUANT, true, + nullptr, 0); + if (status != OH_AI_STATUS_SUCCESS) { + MS_LOG(ERROR) << "Export non quantized inference model error " << flags_->inference_file_; + std::cout << "Export non quantized inference model error " << flags_->inference_file_ << std::endl; + return RET_ERROR; + } + std::cout << "ExportInference() execution time is " << GetTimeUs() - tick << "us\n"; + } + return RET_OK; +} + +int NetTrainCApi::LoadStepInput(size_t step) { + if (step >= batch_num_) { + auto cur_batch = step + 1; + MS_LOG(ERROR) << "Max input Batch is:" << batch_num_ << " but got batch :" << cur_batch; + return RET_ERROR; + } + for (size_t i = 0; i < ms_inputs_for_api_.handle_num; i++) { + OH_AI_TensorHandle cur_tensor = ms_inputs_for_api_.handle_list[i]; + MS_ASSERT(cur_tensor != nullptr); + auto tensor_data_size = OH_AI_TensorGetDataSize(cur_tensor); + auto input_data = OH_AI_TensorGetMutableData(cur_tensor); + MS_ASSERT(input_data != nullptr); + memcpy_s(input_data, tensor_data_size, inputs_buf_[i].get() + step * tensor_data_size, tensor_data_size); + } + return RET_OK; +} + +int NetTrainCApi::ReadInputFile() { + if (this->flags_->in_data_type_ == lite::kImage) { + MS_LOG(ERROR) << "Unsupported image input"; + return RET_ERROR; + } else { + for (size_t i = 0; i < ms_inputs_for_api_.handle_num; i++) { + OH_AI_TensorHandle tensor = ms_inputs_for_api_.handle_list[i]; + MS_ASSERT(tensor != nullptr); + size_t size; + std::string file_name = flags_->in_data_file_ + std::to_string(i + 1) + ".bin"; + auto bin_buf = lite::ReadFile(file_name.c_str(), &size); + if (bin_buf == nullptr) { + MS_LOG(ERROR) << "ReadFile failed"; + return RET_ERROR; + } + auto tensor_data_size = OH_AI_TensorGetDataSize(tensor); + MS_ASSERT(tensor_data_size != 0); + if (size == 0 || size % tensor_data_size != 0 || (batch_num_ != 0 && size / tensor_data_size != batch_num_)) { + std::cerr << "Input binary file size error, required :N * " << tensor_data_size << ", in fact: " << size + << " ,file_name: " << file_name.c_str() << std::endl; + MS_LOG(ERROR) << "Input binary file size error, required: N * " << tensor_data_size << ", in fact: " << size + << " ,file_name: " << file_name.c_str(); + delete bin_buf; + return RET_ERROR; + } + inputs_buf_.emplace_back(bin_buf); + inputs_size_.emplace_back(size); + batch_num_ = size / tensor_data_size; + } + } + return RET_OK; +} + +int NetTrainCApi::InitDumpTensorDataCallbackParameter() { + MS_LOG(ERROR) << "Unsupported feature."; + return RET_ERROR; +} + +int NetTrainCApi::InitTimeProfilingCallbackParameter() { + before_call_back_ = TimeProfilingBeforeCallback; + after_call_back_ = TimeProfilingAfterCallback; + return RET_OK; +} + +int NetTrainCApi::InitMSContext() { + context_ = OH_AI_ContextCreate(); + if (context_ == nullptr) { + MS_LOG(INFO) << "OH_AI_ContextCreate failed"; + return RET_ERROR; + } + OH_AI_ContextSetThreadNum(context_, flags_->num_threads_); + OH_AI_ContextSetThreadAffinityMode(context_, flags_->cpu_bind_mode_); + + OH_AI_DeviceInfoHandle cpu_device_info = OH_AI_DeviceInfoCreate(OH_AI_DEVICETYPE_CPU); + OH_AI_DeviceInfoSetEnableFP16(cpu_device_info, flags_->enable_fp16_); + OH_AI_ContextAddDeviceInfo(context_, cpu_device_info); + return RET_OK; +} + +char **NetTrainCApi::TransStrVectorToCharArrays(const std::vector &s) { + char **char_arr = static_cast(malloc(s.size() * sizeof(char *))); + for (size_t i = 0; i < s.size(); i++) { + char_arr[i] = static_cast(malloc((s[i].size() + 1))); + strcpy(char_arr[i], s[i].c_str()); + } + return char_arr; +} + +std::vector NetTrainCApi::TransCharArraysToStrVector(char **c, const size_t &num) { + std::vector str; + for (size_t i = 0; i < num; i++) { + str.push_back(std::string(c[i])); + } + return str; +} + +void NetTrainCApi::InitTrainCfg() { + if (flags_->loss_name_.empty()) { + return; + } + + std::string delimiter = ","; + size_t pos = 0; + std::string token; + train_cfg_ = OH_AI_TrainCfgCreate(); + size_t num = 0; + std::vector train_cfg_loss_name; + OH_AI_TrainCfgSetLossName(train_cfg_, nullptr, train_cfg_loss_name.size()); + while ((pos = flags_->loss_name_.find(delimiter)) != std::string::npos) { + token = flags_->loss_name_.substr(0, pos); + flags_->loss_name_.erase(0, pos + delimiter.length()); // change to delim without deletion + char **name = OH_AI_TrainCfgGetLossName(train_cfg_, &num); + train_cfg_loss_name = TransCharArraysToStrVector(name, num); + train_cfg_loss_name.push_back(token); + char **loss_name = TransStrVectorToCharArrays(train_cfg_loss_name); + OH_AI_TrainCfgSetLossName(train_cfg_, const_cast(loss_name), train_cfg_loss_name.size()); + for (size_t i = 0; i < train_cfg_loss_name.size(); i++) { + free(loss_name[i]); + } + free(loss_name); + for (size_t i = 0; i < num; i++) { + free(name[i]); + } + free(name); + } + if (!(flags_->loss_name_.empty())) { + char **name = OH_AI_TrainCfgGetLossName(train_cfg_, &num); + train_cfg_loss_name = TransCharArraysToStrVector(name, num); + train_cfg_loss_name.push_back(flags_->loss_name_); + char **loss_name = TransStrVectorToCharArrays(train_cfg_loss_name); + OH_AI_TrainCfgSetLossName(train_cfg_, const_cast(loss_name), train_cfg_loss_name.size()); + for (size_t i = 0; i < train_cfg_loss_name.size(); i++) { + free(loss_name[i]); + } + free(loss_name); + for (size_t i = 0; i < num; i++) { + free(name[i]); + } + free(name); + } +} + +int NetTrainCApi::CreateAndRunNetworkForInference(const std::string &filename, + const OH_AI_ContextHandle &context) { + std::string model_name = filename.substr(filename.find_last_of(DELIM_SLASH) + 1); + std::string filenamems = filename; + if (filenamems.substr(filenamems.find_last_of('.') + 1) != "ms") { + filenamems = filenamems + ".ms"; + } + MS_LOG(INFO) << "start reading model file " << filenamems.c_str(); + std::cout << "start reading model file " << filenamems.c_str() << std::endl; + auto status = OH_AI_ModelBuildFromFile(ms_model_, filenamems.c_str(), + static_cast(mindspore::kMindIR), context); + if (status != OH_AI_STATUS_SUCCESS) { + MS_LOG(ERROR) << "ms model build failed. " << model_name; + return RET_ERROR; + } + return RET_OK; +} + +int NetTrainCApi::CreateAndRunNetworkForTrain(const std::string &filename, const std::string &bb_filename, + const OH_AI_ContextHandle &context, + const OH_AI_TrainCfgHandle &train_cfg, int epochs) { + std::string model_name = filename.substr(filename.find_last_of(DELIM_SLASH) + 1); + OH_AI_Status status; + if (!bb_filename.empty()) { + MS_LOG(ERROR) << "build transfer learning not supported. " << model_name; + return RET_ERROR; + } else { + MS_LOG(INFO) << "Build mindspore model from model file" << filename.c_str(); + std::cout << "Build mindspore model from model file" << filename.c_str() << std::endl; + status = OH_AI_TrainModelBuildFromFile(ms_model_, filename.c_str(), OH_AI_MODELTYPE_MINDIR, context, train_cfg); + if (status != OH_AI_STATUS_SUCCESS) { + MS_LOG(ERROR) << "build transfer learning failed. " << model_name; + return RET_ERROR; + } + } + if (epochs > 0) { + if (flags_->virtual_batch_) { + OH_AI_ModelSetupVirtualBatch(ms_model_, epochs, -1.0f, -1.0f); + } + status = OH_AI_ModelSetTrainMode(ms_model_, true); + if (status != OH_AI_STATUS_SUCCESS) { + MS_LOG(ERROR) << "set train mode failed. "; + return RET_ERROR; + } + } + return RET_OK; +} + +int NetTrainCApi::CompareOutput() { + std::cout << "================ Comparing Forward Output data ================" << std::endl; + float total_bias = 0; + int total_size = 0; + bool has_error = false; + auto output_tensors_handle = OH_AI_ModelGetOutputs(ms_model_); + + std::vector output_tensors; + for (size_t i = 0; i < output_tensors_handle.handle_num; i++) { + output_tensors.push_back(*static_cast(output_tensors_handle.handle_list[i])); + } + if (output_tensors.empty()) { + MS_LOG(ERROR) << "Cannot find output tensors, get model output failed"; + return RET_ERROR; + } + std::map ordered_outputs; + for (const auto &output_tensor : output_tensors) { + ordered_outputs.insert({output_tensor.Name(), output_tensor}); + } + int i = 1; + mindspore::MSTensor tensor; + for (auto &ordered_output : ordered_outputs) { + tensor = ordered_output.second; + std::cout << "output is tensor " << ordered_output.first << "\n"; + auto outputs = tensor.MutableData(); + size_t size; + std::string output_file = flags_->data_file_ + std::to_string(i) + ".bin"; + auto bin_buf = std::unique_ptr(ReadFileBuf(output_file.c_str(), &size)); + if (bin_buf == nullptr) { + MS_LOG(ERROR) << "ReadFile return nullptr"; + std::cout << "ReadFile return nullptr" << std::endl; + return RET_ERROR; + } + if (size != tensor.DataSize()) { + MS_LOG(ERROR) << "Output buffer and output file differ by size. Tensor size: " << tensor.DataSize() + << ", read size: " << size; + std::cout << "Output buffer and output file differ by size. Tensor size: " << tensor.DataSize() + << ", read size: " << size << std::endl; + return RET_ERROR; + } + float bias = CompareData(bin_buf.get(), tensor.ElementNum(), reinterpret_cast(outputs)); + if (bias >= 0) { + total_bias += bias; + total_size++; + } else { + has_error = true; + break; + } + i++; + } + + if (!has_error) { + float mean_bias; + if (total_size != 0) { + mean_bias = total_bias / total_size * 100; + } else { + mean_bias = 0; + } + + std::cout << "Mean bias of all nodes/tensors: " << mean_bias << "%" + << " threshold is:" << this->flags_->accuracy_threshold_ << std::endl; + std::cout << "=======================================================" << std::endl << std::endl; + + if (mean_bias > this->flags_->accuracy_threshold_) { + MS_LOG(INFO) << "Mean bias of all nodes/tensors is too big: " << mean_bias << "%"; + std::cout << "Mean bias of all nodes/tensors is too big: " << mean_bias << "%" << std::endl; + return RET_TOO_BIG; + } else { + return RET_OK; + } + } else { + MS_LOG(ERROR) << "Error in CompareData"; + std::cerr << "Error in CompareData" << std::endl; + std::cout << "=======================================================" << std::endl << std::endl; + return RET_ERROR; + } +} + +int NetTrainCApi::MarkPerformance() { + MS_LOG(INFO) << "Running train loops..."; + std::cout << "Running train loops..." << std::endl; + uint64_t time_min = 0xFFFFFFFFFFFFFFFF; + uint64_t time_max = 0; + uint64_t time_avg = 0; + std::vector outputs; + + for (int i = 0; i < flags_->epochs_; i++) { + auto start = GetTimeUs(); + for (size_t step = 0; step < batch_num_; step++) { + MS_LOG(INFO) << "Run for epoch:" << i << " step:" << step; + auto ret = LoadStepInput(step); + if (ret != RET_OK) { + return ret; + } + auto status = OH_AI_RunStep(ms_model_, before_call_back_, after_call_back_); + if (status != OH_AI_STATUS_SUCCESS) { + MS_LOG(ERROR) << "Inference error " << status; + std::cerr << "Inference error " << status; + return RET_ERROR; + } + } + + auto end = GetTimeUs(); + auto time = end - start; + time_min = std::min(time_min, time); + time_max = std::max(time_max, time); + time_avg += time; + } + + if (flags_->time_profiling_) { + const std::vector per_op_name = {"opName", "avg(ms)", "percent", "calledTimes", "opTotalTime"}; + const std::vector per_op_type = {"opType", "avg(ms)", "percent", "calledTimes", "opTotalTime"}; + PrintResult(per_op_name, g_c_op_times_by_name_); + PrintResult(per_op_type, g_c_op_times_by_type_); + } + + if (flags_->epochs_ > 0) { + time_avg /= static_cast(flags_->epochs_); + MS_LOG(INFO) << "Model = " << flags_->model_file_.substr(flags_->model_file_.find_last_of(DELIM_SLASH) + 1).c_str() + << ", NumThreads = " << flags_->num_threads_ << ", MinRunTime = " << time_min / 1000.0f + << ", MaxRuntime = " << time_max / 1000.0f << ", AvgRunTime = " << time_avg / 1000.0f; + printf("Model = %s, NumThreads = %d, MinRunTime = %f ms, MaxRuntime = %f ms, AvgRunTime = %f ms\n", + flags_->model_file_.substr(flags_->model_file_.find_last_of(DELIM_SLASH) + 1).c_str(), flags_->num_threads_, + time_min / 1000.0f, time_max / 1000.0f, time_avg / 1000.0f); + } + return RET_OK; +} + +int NetTrainCApi::MarkAccuracy(bool enforce_accuracy) { + MS_LOG(INFO) << "MarkAccuracy"; + auto load_ret = LoadStepInput(0); + if (load_ret != RET_OK) { + return load_ret; + } + auto status = PrintInputData(); + if (status != RET_OK) { + MS_LOG(ERROR) << "PrintInputData failed, ret: " << status; + return status; + } + status = OH_AI_RunStep(ms_model_, before_call_back_, after_call_back_); + if (status != OH_AI_STATUS_SUCCESS) { + MS_LOG(ERROR) << "Inference error " << status; + std::cerr << "Inference error " << status << std::endl; + return RET_ERROR; + } + + auto ret = CompareOutput(); + if (ret == RET_TOO_BIG && !enforce_accuracy) { + MS_LOG(INFO) << "Accuracy Error is big but not enforced"; + std::cout << "Accuracy Error is big but not enforced" << std::endl; + return RET_OK; + } + + if (ret != RET_OK) { + MS_LOG(ERROR) << "Compare output error " << ret; + std::cerr << "Compare output error " << ret << std::endl; + return ret; + } + return RET_OK; +} + +int NetTrainCApi::CreateAndRunNetwork(const std::string &filename, const std::string &bb_filename, bool is_train, + int epochs, bool check_accuracy) { + auto start_prepare_time = GetTimeUs(); + + int ret = InitMSContext(); + if (ret != RET_OK) { + MS_LOG(ERROR) << "InitContext failed, ret: " << ret; + return ret; + } + + InitTrainCfg(); + ms_model_ = OH_AI_ModelCreate(); + + if (is_train) { + ret = CreateAndRunNetworkForTrain(filename, bb_filename, context_ , train_cfg_, epochs); + if (ret != RET_OK) { + MS_LOG(ERROR) << "CreateAndRunNetworkForTrain failed."; + return RET_ERROR; + } + } else { + ret = CreateAndRunNetworkForInference(filename, context_); + if (ret != RET_OK) { + MS_LOG(ERROR) << "CreateAndRunNetworkForInference failed."; + return RET_ERROR; + } + } + + ms_inputs_for_api_ = OH_AI_ModelGetInputs(ms_model_); + if (ms_inputs_for_api_.handle_list == nullptr) { + MS_LOG(ERROR) << "OH_AI_ModelGetInputs failed, ret: "; + return RET_ERROR; + } + + if (!flags_->resize_dims_.empty()) { + std::vector shape_infos; + std::transform(flags_->resize_dims_.begin(), flags_->resize_dims_.end(), std::back_inserter(shape_infos), + [&](auto &shapes) { + OH_AI_ShapeInfo shape_info; + shape_info.shape_num = shapes.size(); + for (size_t i = 0; i < shape_info.shape_num; i++) { + shape_info.shape[i] = shapes[i]; + } + return shape_info; + }); + auto status = OH_AI_ModelResize(ms_model_, ms_inputs_for_api_, shape_infos.data(), shape_infos.size()); + if (status != OH_AI_STATUS_SUCCESS) { + MS_LOG(ERROR) << "Input tensor resize failed."; + std::cout << "Input tensor resize failed."; + return RET_ERROR; + } + } + + auto end_prepare_time = GetTimeUs(); + MS_LOG(INFO) << "PrepareTime = " << ((end_prepare_time - start_prepare_time) / kTHOUSAND) << " ms"; + std::cout << "PrepareTime = " << ((end_prepare_time - start_prepare_time) / kTHOUSAND) << " ms" << std::endl; + // Load input + MS_LOG(INFO) << "Load input data"; + auto status = LoadInput(); + if (status != RET_OK) { + MS_LOG(ERROR) << "Load input data error"; + std::cout << "Load input data error" << std::endl; + return status; + } + + if ((epochs > 0) && is_train) { + status = MarkPerformance(); + if (status != RET_OK) { + MS_LOG(ERROR) << "Run MarkPerformance error: " << status; + std::cout << "Run MarkPerformance error: " << status << std::endl; + return status; + } + SaveModels(); // save file if flags are on + } + if (!flags_->data_file_.empty()) { + auto res = OH_AI_ModelSetTrainMode(ms_model_, false); + if (res != OH_AI_STATUS_SUCCESS) { + MS_LOG(ERROR) << "set eval mode failed. "; + return RET_ERROR; + } + + status = MarkAccuracy(check_accuracy); + if (status != RET_OK) { + MS_LOG(ERROR) << "Run MarkAccuracy error: " << status; + std::cout << "Run MarkAccuracy error: " << status << std::endl; + return status; + } + } + return RET_OK; +} + +int NetTrainCApi::PrintInputData() { + constexpr int64_t kPrintDataNum = 20; + for (size_t i = 0; i < ms_inputs_for_api_.handle_num; i++) { + auto input = ms_inputs_for_api_.handle_list[i]; + std::cout << "InData" << i << ": "; + auto data_type = static_cast(OH_AI_TensorGetDataType(input)); + if (data_type == TypeId::kObjectTypeString) { + MS_LOG(ERROR) << "Unsupported OH_AI_DATATYPE_OBJECTTYPE_STRING."; + return RET_ERROR; + } + auto tensor_data = OH_AI_TensorGetData(input); + size_t print_num = std::min(OH_AI_TensorGetElementNum(input), kPrintDataNum); + for (size_t j = 0; j < print_num; j++) { + if (data_type == TypeId::kNumberTypeFloat32 || data_type == TypeId::kNumberTypeFloat) { + std::cout << static_cast(tensor_data)[j] << " "; + } else if (data_type == TypeId::kNumberTypeInt8) { + std::cout << static_cast(tensor_data)[j] << " "; + } else if (data_type == TypeId::kNumberTypeUInt8) { + std::cout << static_cast(tensor_data)[j] << " "; + } else if (data_type == TypeId::kNumberTypeInt32) { + std::cout << static_cast(tensor_data)[j] << " "; + } else if (data_type == TypeId::kNumberTypeInt64) { + std::cout << static_cast(tensor_data)[j] << " "; + } else if (data_type == TypeId::kNumberTypeBool) { + std::cout << static_cast(tensor_data)[j] << " "; + } else { + MS_LOG(ERROR) << "Datatype: " << data_type << " is not supported."; + return RET_ERROR; + } + } + std::cout << std::endl; + } + return RET_OK; +} + +int NetTrainCApi::PrintResult(const std::vector &title, + const std::map> &result) { + std::vector columnLenMax(kFieldsToPrint); + std::vector> rows; + + for (auto &iter : result) { + std::string stringBuf[kFieldsToPrint]; + std::vector columns; + size_t len = 0; + int index = 0; + len = iter.first.size(); + if (len > columnLenMax.at(index)) { + columnLenMax.at(index) = len + kPrintOffset; + } + columns.push_back(iter.first); + + index++; + if (title[0] == "opName") { + stringBuf[index] = std::to_string(iter.second.second / flags_->epochs_); + } else { + stringBuf[index] = std::to_string(iter.second.second / iter.second.first); + } + len = stringBuf[index].length(); + if (len > columnLenMax.at(index)) { + columnLenMax.at(index) = len + kPrintOffset; + } + columns.emplace_back(stringBuf[index]); + + index++; + stringBuf[index] = std::to_string(iter.second.second / g_op_cost_total_); + len = stringBuf[index].length(); + if (len > columnLenMax.at(index)) { + columnLenMax.at(index) = len + kPrintOffset; + } + columns.emplace_back(stringBuf[index]); + + index++; + stringBuf[index] = std::to_string(iter.second.first); + len = stringBuf[index].length(); + if (len > columnLenMax.at(index)) { + columnLenMax.at(index) = len + kPrintOffset; + } + columns.emplace_back(stringBuf[index]); + + index++; + stringBuf[index] = std::to_string(iter.second.second); + len = stringBuf[index].length(); + if (len > columnLenMax.at(index)) { + columnLenMax.at(index) = len + kPrintOffset; + } + columns.emplace_back(stringBuf[index]); + + rows.push_back(columns); + } + + printf("-------------------------------------------------------------------------\n"); + for (int i = 0; i < kFieldsToPrint; i++) { + auto printBuf = title[i]; + if (printBuf.size() > columnLenMax.at(i)) { + columnLenMax.at(i) = printBuf.size(); + } + printBuf.resize(columnLenMax.at(i), ' '); + printf("%s\t", printBuf.c_str()); + } + printf("\n"); + for (auto &row : rows) { + for (int j = 0; j < kFieldsToPrint; j++) { + auto printBuf = row[j]; + printBuf.resize(columnLenMax.at(j), ' '); + printf("%s\t", printBuf.c_str()); + } + printf("\n"); + } + return RET_OK; +} + +bool TimeProfilingBeforeCallback(const OH_AI_TensorHandleArray inputs, const OH_AI_TensorHandleArray outputs, + const OH_AI_CallBackParam kernel_Info) { + if (g_c_op_times_by_type_.find(kernel_Info.node_type) == g_c_op_times_by_type_.end()) { + g_c_op_times_by_type_.insert(std::make_pair(kernel_Info.node_type, std::make_pair(0, 0.0f))); + } + if (g_c_op_times_by_name_.find(kernel_Info.node_name) == g_c_op_times_by_name_.end()) { + g_c_op_times_by_name_.insert(std::make_pair(kernel_Info.node_name, std::make_pair(0, 0.0f))); + } + + g_op_call_times_total_++; + g_op_begin_ = mindspore::lite::GetTimeUs(); + return true; +} + +bool TimeProfilingAfterCallback(const OH_AI_TensorHandleArray inputs, const OH_AI_TensorHandleArray outputs, + const OH_AI_CallBackParam kernel_Info) { + uint64_t opEnd = mindspore::lite::GetTimeUs(); + float cost = static_cast(opEnd - g_op_begin_) / 1000.0f; + g_op_cost_total_ += cost; + g_c_op_times_by_type_[kernel_Info.node_type].first++; + g_c_op_times_by_type_[kernel_Info.node_type].second += cost; + g_c_op_times_by_name_[kernel_Info.node_name].first++; + g_c_op_times_by_name_[kernel_Info.node_name].second += cost; + return true; +} +} // namespace lite +} // namespace mindspore + + diff --git a/mindspore/lite/tools/benchmark_train/net_train_c_api.h b/mindspore/lite/tools/benchmark_train/net_train_c_api.h new file mode 100644 index 00000000..bb84d3c1 --- /dev/null +++ b/mindspore/lite/tools/benchmark_train/net_train_c_api.h @@ -0,0 +1,121 @@ +/** + * Copyright 2023-2023 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_LITE_TOOLS_BENCHMARK_NET_TRAIN_C_API_H +#define MINDSPORE_LITE_TOOLS_BENCHMARK_NET_TRAIN_C_API_H + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "include/api/model.h" +#include "include/api/types.h" +#include "include/api/context.h" +#include "include/api/cfg.h" + +#include "include/c_api/model_c.h" +#include "include/c_api/context_c.h" + +#ifdef ENABLE_FP16 +#include +#endif +#include "tools/common/flag_parser.h" +#include "src/common/file_utils.h" +#include "src/common/utils.h" +#include "tools/benchmark_train/net_train_base.h" + +namespace mindspore::lite { + namespace { + std::map> g_c_op_times_by_type_; + std::map> g_c_op_times_by_name_; + } +#ifdef __cplusplus + extern "C" { +#endif + bool TimeProfilingBeforeCallback(const OH_AI_TensorHandleArray inputs, const OH_AI_TensorHandleArray outputs, + const OH_AI_CallBackParam kernel_Info); + bool TimeProfilingAfterCallback(const OH_AI_TensorHandleArray inputs, const OH_AI_TensorHandleArray outputs, + const OH_AI_CallBackParam kernel_Info); +#ifdef __cplusplus + } +#endif + +class MS_API NetTrainCApi : public NetTrainBase { + public: + explicit NetTrainCApi(NetTrainFlags *flags) : NetTrainBase(flags) {} + virtual ~NetTrainCApi() {}; + + protected: + // call GenerateRandomData to fill inputTensors + int GenerateInputData() override; + + int ReadInputFile() override; + + int LoadStepInput(size_t step); + + int InitMSContext(); + + void InitTrainCfg(); + + char **TransStrVectorToCharArrays(const std::vector &s); + + std::vector TransCharArraysToStrVector(char **c, const size_t &num); + + int CreateAndRunNetwork(const std::string &filename, const std::string &bb_filename, bool is_train, int epochs, + bool check_accuracy = true) override; + + int CreateAndRunNetworkForInference(const std::string &filename, const OH_AI_ContextHandle &context); + + int CreateAndRunNetworkForTrain(const std::string &filename, const std::string &bb_filename, + const OH_AI_ContextHandle &context, + const OH_AI_TrainCfgHandle &train_cfg, int epochs); + + int InitDumpTensorDataCallbackParameter() override; + + int InitTimeProfilingCallbackParameter() override; + + int PrintResult(const std::vector &title, const std::map> &result) override; + + int PrintInputData(); + + int MarkPerformance() override; + + int MarkAccuracy(bool enforce_accuracy = true) override; + + int CompareOutput() override; + + int SaveModels() override; + + OH_AI_ModelHandle ms_model_; + OH_AI_TensorHandleArray ms_inputs_for_api_; + OH_AI_ContextHandle context_ = nullptr; + OH_AI_TrainCfgHandle train_cfg_ = nullptr; + OH_AI_KernelCallBack before_call_back_{nullptr}; + OH_AI_KernelCallBack after_call_back_{nullptr}; +}; +} // namespace mindspore::lite + +#endif //MINDSPORE_LITE_TOOLS_BENCHMARK_NET_TRAIN_C_API_H diff --git a/mindspore/lite/tools/benchmark_train/run_net_train.cc b/mindspore/lite/tools/benchmark_train/run_net_train.cc new file mode 100644 index 00000000..37a7e602 --- /dev/null +++ b/mindspore/lite/tools/benchmark_train/run_net_train.cc @@ -0,0 +1,86 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "tools/benchmark_train/run_net_train.h" +#include "tools/benchmark_train/net_train.h" +#include "tools/benchmark_train/net_train_c_api.h" + +namespace mindspore { +namespace lite { +int RunNetTrain(int argc, const char **argv) { + NetTrainFlags flags; + Option err = flags.ParseFlags(argc, argv); + + if (err.IsSome()) { + std::cerr << err.Get() << std::endl; + std::cerr << flags.Usage() << std::endl; + return RET_ERROR; + } + + if (flags.help) { + std::cerr << flags.Usage() << std::endl; + return RET_OK; + } + if (flags.unified_api_) { + return NetTrain::RunNr(&flags); + } + + auto api_type = std::getenv("MSLITE_API_TYPE"); + if (api_type != nullptr) { + MS_LOG(INFO) << "MSLITE_API_TYPE = " << api_type; + std::cout << "MSLITE_API_TYPE = " << api_type << std::endl; + } + + NetTrainBase *net_trainer = nullptr; + if (api_type == nullptr || std::string(api_type) == "NEW") { + net_trainer = new (std::nothrow) NetTrain(&flags); + } else if (std::string(api_type) == "C") { + net_trainer = new (std::nothrow) NetTrainCApi(&flags); + } else { + MS_LOG(ERROR) << "Invalid MSLITE_API_TYPE, (NEW/C, default:NEW)"; + return RET_ERROR; + } + + if (net_trainer == nullptr) { + MS_LOG(ERROR) << "new net_trainer failed."; + return RET_ERROR; + } + auto status = net_trainer->Init(); + if (status != RET_OK) { + MS_LOG(ERROR) << "NetTrain init Error : " << status; + std::cerr << "NetTrain init Error : " << status << std::endl; + return RET_ERROR; + } + + status = net_trainer->RunNetTrain(); + if (status != RET_OK) { + MS_LOG(ERROR) << "Run NetTrain " + << flags.model_file_.substr(flags.model_file_.find_last_of("/") + 1).c_str() + << " Failed : " << status; + std::cerr << "Run NetTrain " << flags.model_file_.substr(flags.model_file_.find_last_of("/") + 1).c_str() + << " Failed : " << status << std::endl; + return RET_ERROR; + } + + MS_LOG(INFO) << "Run NetTrain " << flags.model_file_.substr(flags.model_file_.find_last_of("/") + 1).c_str() + << " Success."; + std::cout << "Run NetTrain " << flags.model_file_.substr(flags.model_file_.find_last_of("/") + 1).c_str() + << " Success." << std::endl; + delete net_trainer; + return RET_OK; +} +} // namespace lite +} // namespace mindspore \ No newline at end of file diff --git a/mindspore/lite/tools/benchmark_train/run_net_train.h b/mindspore/lite/tools/benchmark_train/run_net_train.h new file mode 100644 index 00000000..9ca2d73c --- /dev/null +++ b/mindspore/lite/tools/benchmark_train/run_net_train.h @@ -0,0 +1,22 @@ +/** + * Copyright 2023-2023 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_LITE_TOOLS_BENCHMARK_RUN_NET_TRAIN_H +#define MINDSPORE_LITE_TOOLS_BENCHMARK_RUN_NET_TRAIN_H +namespace mindspore::lite { +int RunNetTrain(int argc, const char **argv); +} // namespace mindspore::lite +#endif // MINDSPORE_LITE_TOOLS_BENCHMARK_RUN_NET_TRAIN_H diff --git a/mindspore/lite/tools/converter/CMakeLists.txt b/mindspore/lite/tools/converter/CMakeLists.txt index 1e09d2ed..f854620f 100644 --- a/mindspore/lite/tools/converter/CMakeLists.txt +++ b/mindspore/lite/tools/converter/CMakeLists.txt @@ -7,6 +7,8 @@ endif() set(SRC_DIR ${CMAKE_CURRENT_SOURCE_DIR}/../../src) +include_directories(${CMAKE_SOURCE_DIR}/mindspore/lite/) + if(ENABLE_GPU) add_compile_definitions(ENABLE_GPU) endif() @@ -70,6 +72,7 @@ add_subdirectory(parser/caffe) add_subdirectory(parser/tflite) add_subdirectory(parser/onnx) add_subdirectory(parser/tf) +add_subdirectory(parser/third_party) if(ENABLE_CONVERT_PYTORCH_MODEL) add_subdirectory(parser/pytorch) endif() @@ -363,6 +366,7 @@ target_link_libraries(mindspore_converter tf_parser_mid caffe_parser_mid onnx_parser_mid + third_party_parser_mid lite_exporter_mid graph_pass_mid fusion_mid diff --git a/mindspore/lite/tools/converter/config_parser/config_file_parser.cc b/mindspore/lite/tools/converter/config_parser/config_file_parser.cc index fecc56d9..2e7ca749 100644 --- a/mindspore/lite/tools/converter/config_parser/config_file_parser.cc +++ b/mindspore/lite/tools/converter/config_parser/config_file_parser.cc @@ -34,6 +34,7 @@ constexpr auto kMixedBitWeightQuantParam = "mixed_bit_weight_quant_param"; constexpr auto kDataPreprocessParam = "data_preprocess_param"; constexpr auto kRegistry = "registry"; constexpr auto kMicroParam = "micro_param"; +constexpr auto kThirdPartyModelParam = "third_party_model"; constexpr auto kCpuOptionParam = "cpu_option_cfg_param"; constexpr auto kCustomOppPath = "custom_opp_path"; constexpr auto kTransformQuantParam = "transform_quant_param"; @@ -330,6 +331,12 @@ int ConfigFileParser::ParseConfigParam(std::maperase(kThirdPartyModelParam); + if (ret != RET_OK) { + MS_LOG(ERROR) << "ParseTransformQuantString failed."; + return ret; + } ret = ParseWeightQuantString(*maps); (void)maps->erase(kWeightQuantParam); if (ret != RET_OK) { @@ -594,5 +601,25 @@ int ConfigFileParser::ParseGraphKernelString(const std::map> §ions) { + if (sections.find(kThirdPartyModelParam) == sections.end()) { + return RET_OK; + } + const auto &input_args = sections.at(kThirdPartyModelParam); + const std::map kValidArgs = { + {"input_shapes", third_party_model_string_.input_shapes}, + {"input_dtypes", third_party_model_string_.input_dtypes}, + {"input_names", third_party_model_string_.input_names}, + {"input_formats", third_party_model_string_.input_formats}, + {"output_shapes", third_party_model_string_.output_shapes}, + {"output_dtypes", third_party_model_string_.output_dtypes}, + {"output_names", third_party_model_string_.output_names}, + {"output_formats", third_party_model_string_.output_formats}, + {"extended_parameters", third_party_model_string_.extended_parameters}, + }; + return SetMapData(input_args, kValidArgs, kThirdPartyModelParam); +} } // namespace lite } // namespace mindspore diff --git a/mindspore/lite/tools/converter/config_parser/config_file_parser.h b/mindspore/lite/tools/converter/config_parser/config_file_parser.h index 31269816..6997bac8 100644 --- a/mindspore/lite/tools/converter/config_parser/config_file_parser.h +++ b/mindspore/lite/tools/converter/config_parser/config_file_parser.h @@ -110,6 +110,18 @@ struct MicroParamString { std::string changeable_weights_name; }; +struct ThirdPartyModelString { + std::string input_dtypes; + std::string input_shapes; + std::string input_names; // optional, default: "" + std::string input_formats; // optional, default: NHWC + std::string output_dtypes; + std::string output_shapes; + std::string output_names; // optional, default: "" + std::string output_formats; // optional, default: NHWC + std::string extended_parameters; // format: {key1:value1;ker2:value2} +}; + struct CpuOptionCfgString { std::string architecture; std::string instruction; @@ -144,6 +156,7 @@ class ConfigFileParser { RegistryInfoString GetRegistryInfoString() const { return this->registry_info_string_; } AclOptionCfgString GetAclOptionCfgString() { return this->acl_option_cfg_string_; } MicroParamString GetMicroParamString() { return this->micro_param_string_; } + lite::ThirdPartyModelString GetThirdPartyModelString() const { return this->third_party_model_string_; } CpuOptionCfgString GetCpuOptionCfgString() { return this->cpu_option_cfg_string_; } TransformQuantString GetTransformQuantString() const { return this->transform_quant_string_; } AscendQuantString GetAscendQuantString() const { return this->ascend_quant_string_; } @@ -161,6 +174,7 @@ class ConfigFileParser { int SetMapData(const std::map &input_map, const std::map &parse_map, const std::string §ion); int ParseMicroParamString(const std::map> &maps); + int ParseThirdPartyParamString(const std::map> §ions); int ParseCpuOptionCfgString(const std::map> &maps); int ParseTransformQuantString(const std::map> &maps); int ParseAscendQuantString(const std::map> &maps); @@ -176,6 +190,7 @@ class ConfigFileParser { RegistryInfoString registry_info_string_; AclOptionCfgString acl_option_cfg_string_; MicroParamString micro_param_string_; + lite::ThirdPartyModelString third_party_model_string_; CpuOptionCfgString cpu_option_cfg_string_; TransformQuantString transform_quant_string_; AscendQuantString ascend_quant_string_; diff --git a/mindspore/lite/tools/converter/config_parser/third_party_param_parser.cc b/mindspore/lite/tools/converter/config_parser/third_party_param_parser.cc new file mode 100644 index 00000000..aee6a29c --- /dev/null +++ b/mindspore/lite/tools/converter/config_parser/third_party_param_parser.cc @@ -0,0 +1,299 @@ +/** + * Copyright 2023 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "tools/converter/config_parser/third_party_param_parser.h" +#include +#include +#include +#include "include/errorcode.h" +#include "src/common/log_adapter.h" +#include "nnacl/op_base.h" +#include "tools/common/string_util.h" + +namespace mindspore { +namespace lite { +namespace { +const std::map kDataTypeMap = { + {"float64", TypeId::kNumberTypeFloat64}, {"float32", TypeId::kNumberTypeFloat32}, + {"float16", TypeId::kNumberTypeFloat16}, {"int64", TypeId::kNumberTypeInt64}, + {"int32", TypeId::kNumberTypeInt32}, {"int16", TypeId::kNumberTypeInt16}, + {"int8", TypeId::kNumberTypeInt8}, {"uint8", TypeId::kNumberTypeUInt8}, + {"bool", TypeId::kNumberTypeBool}, +}; + +TypeId ConvertDataType(const std::string &type) { + auto iter = kDataTypeMap.find(type); + if (iter == kDataTypeMap.end()) { + return TypeId::kTypeUnknown; + } + return iter->second; +} +} // namespace + +/** + * Parse shapes like "1,256,256,3;3,96;96,96", and return like [[1,256,256,3], [3,96], [96,96]]. + */ +int ThirdPartyParamParser::DoParseShape(const std::string &src, std::vector> *dst_shapes) { + MS_CHECK_TRUE_RET(dst_shapes != nullptr, RET_ERROR); + dst_shapes->clear(); + + auto tmp_shapes = SplitStringToVector(src, ";"); + for (auto tmp_shape : tmp_shapes) { + auto tmp = SplitStringToVector(tmp_shape, ","); + std::vector shape = {}; + for (auto t : tmp) { + int value = 0; + if (!ConvertIntNum(t, &value)) { + MS_LOG(ERROR) << "Found error when convert shape string to integer"; + return RET_ERROR; + } + if (value <= 0) { // Valid shape value should be greater than 0. + MS_LOG(ERROR) << "Only support fixed shapes in third party param"; + return RET_ERROR; + } + shape.push_back(value); + } + dst_shapes->push_back(shape); + } + return RET_OK; +} + +/** + * Parse extended parameter like "key_1:value_1;key_2:value_2" and get {{"key_1", "value_1"}, {"key_2", "value_2"}}. + */ +int ThirdPartyParamParser::DoParseExtendedParameters(const std::string &src, + std::map> *dst_ext_param) { + MS_CHECK_TRUE_RET(dst_ext_param != nullptr, RET_ERROR); + constexpr size_t kKeyIndex = 0U; + constexpr size_t kValueIndex = 1U; + constexpr size_t kKeyValueSize = 2U; + + if (src == "") { // Just return if 'extended_parameters' is configured. + return RET_OK; + } + + auto tmp_list = SplitStringToVector(src, ";"); + std::map> tmp_map = {}; + for (auto tmp : tmp_list) { + auto key_and_value = SplitStringToVector(tmp, ":"); + if (key_and_value.size() != kKeyValueSize) { + MS_LOG(ERROR) << "Parse extended parameters failed, should keep key:value format"; + return RET_ERROR; + } + auto key = key_and_value[kKeyIndex]; + auto value = key_and_value[kValueIndex]; + if (tmp_map.find(key) != tmp_map.end()) { + MS_LOG(ERROR) << "Parse extended parameters failed, key should not be duplicated"; + return RET_ERROR; + } + tmp_map.emplace(key, std::vector(value.begin(), value.end())); + } + + *dst_ext_param = tmp_map; + return RET_OK; +} + +/** + * Parse dtypes like "float32;float32;int32" and return [kNumberTypeFloat32, kNumberTypeFloat32, kNumberTypeInt32] + */ +int ThirdPartyParamParser::DoParseDtypes(const std::string &src, std::vector *dst_dtypes) { + MS_CHECK_TRUE_RET(dst_dtypes != nullptr, RET_ERROR); + dst_dtypes->clear(); + auto tmp_dtypes = SplitStringToVector(src, ";"); + for (auto tmp_dtype : tmp_dtypes) { + TypeId type = ConvertDataType(tmp_dtype); + if (type == kTypeUnknown) { + MS_LOG(ERROR) << "Parse dtypes in third party model config failed"; + return RET_ERROR; + } + dst_dtypes->push_back(type); + } + return RET_OK; +} + +/** + * Parse names like "foo;bar;boo" and get ["foo", "bar", "boo"] + * If input names are not provided in config, use the default prefix to generate like: "in_0;in_1;..;in_n" + */ +int ThirdPartyParamParser::DoParseNames(const std::string &src, size_t num, const std::string &default_prefix, + std::vector *dst_names) { + MS_CHECK_TRUE_RET(dst_names != nullptr, RET_ERROR); + std::string tmp_names = src; + if (tmp_names.empty()) { + std::string tmp = ""; + for (size_t i = 0; i < num; i++) { + tmp += default_prefix + "_" + std::to_string(i); + if (i + 1 < num) { + tmp += ";"; + } + } + tmp_names = tmp; + } + + *dst_names = SplitStringToVector(tmp_names, ";"); + if (dst_names->size() != num) { + MS_LOG(ERROR) << "Name number " << dst_names->size() << " and input number: " << num << " are not equal"; + return RET_ERROR; + } + return RET_OK; +} + +/** + * Parse formats like "NCHW;NHWC" and get [NCHW, NHWC] + */ +namespace { + int StringToFormat(const std::string &format_string, schema::Format *format) { + static const std::unordered_map kFormatTable = { + {"NCHW", schema::Format::Format_NCHW}, + {"NHWC", schema::Format::Format_NHWC}, + {"NHWC4", schema::Format::Format_NHWC4}, + {"HWKC", schema::Format::Format_HWKC}, + {"HWCK", schema::Format::Format_HWCK}, + {"KCHW", schema::Format::Format_KCHW}, + {"CKHW", schema::Format::Format_CKHW}, + {"KHWC", schema::Format::Format_KHWC}, + {"CHWK", schema::Format::Format_CHWK}, + {"HW", schema::Format::Format_HW}, + {"HW4", schema::Format::Format_HW4}, + {"NC", schema::Format::Format_NC}, + {"NC4", schema::Format::Format_NC4}, + {"NC4HW4", schema::Format::Format_NC4HW4}, + {"NUM_OF_FORMAT", schema::Format::Format_NUM_OF_FORMAT}, + {"NCDHW", schema::Format::Format_NCDHW}, + {"NWC", schema::Format::Format_NWC}, + {"NCW", schema::Format::Format_NCW}, + }; + + if (format == nullptr) { + return RET_NULL_PTR; + } + + auto iter = kFormatTable.find(format_string); + if (iter == kFormatTable.end()) { + return RET_PARAM_INVALID; + } + + *format = iter->second; + return RET_OK; + } +} + +int ThirdPartyParamParser::DoParseFormats(const std::string &src, size_t num, + std::vector *result_formats) { + MS_CHECK_TRUE_RET(result_formats != nullptr, RET_ERROR); + std::string tmp_names = src; + if (tmp_names.empty()) { + std::vector default_formats(num, schema::Format::Format_NHWC); + *result_formats = default_formats; + return RET_OK; + } + + auto format_strings = SplitStringToVector(tmp_names, ";"); + if (format_strings.size() != num) { + MS_LOG(ERROR) << "Number of format: " << format_strings.size() << " and number of tensor: " << num << " are not equal"; + return RET_ERROR; + } + + std::vector result(num); + for (size_t i = 0; i < num; i++) { + if (StringToFormat(format_strings[i], &result[i]) != RET_OK) { + MS_LOG(ERROR) << "Tensor format:" << format_strings[i] << " is invalid"; + return RET_PARAM_INVALID; + } + } + *result_formats = result; + return RET_OK; +} + +int ThirdPartyParamParser::Parse(const ThirdPartyModelString ¶m_string, ThirdPartyModelParam *param) { + MS_CHECK_TRUE_RET(param != nullptr, RET_ERROR); + + auto ret = DoParseShape(param_string.input_shapes, &(param->input_shapes)); + if (ret != RET_OK) { + MS_LOG(ERROR) << "Parse input shapes of third party param failed"; + return RET_ERROR; + } + + ret = DoParseDtypes(param_string.input_dtypes, &(param->input_dtypes)); + if (ret != RET_OK) { + MS_LOG(ERROR) << "Parse input dtypes of third party param failed"; + return RET_ERROR; + } + + auto input_shape_num = param->input_shapes.size(); + auto input_dtype_num = param->input_dtypes.size(); + if (input_shape_num != input_dtype_num) { + MS_LOG(ERROR) << "Input shape number: " << input_shape_num << " and dtype number: " << input_dtype_num + << " are not equal"; + return RET_ERROR; + } + + ret = DoParseFormats(param_string.input_formats, input_shape_num, &(param->input_formats)); + if (ret != RET_OK) { + MS_LOG(ERROR) << "Parse input formats of third party param failed"; + return RET_ERROR; + } + + const std::string kInputNamePrefix = "in"; + ret = DoParseNames(param_string.input_names, input_shape_num, kInputNamePrefix, &(param->input_names)); + if (ret != RET_OK) { + MS_LOG(ERROR) << "Parse input names of third party param failed"; + return RET_ERROR; + } + + ret = DoParseShape(param_string.output_shapes, &(param->output_shapes)); + if (ret != RET_OK) { + MS_LOG(ERROR) << "Parse output shaped of third party param failed"; + return RET_ERROR; + } + + ret = DoParseDtypes(param_string.output_dtypes, &(param->output_dtypes)); + if (ret != RET_OK) { + MS_LOG(ERROR) << "Parse output dtypes of third party param failed"; + return RET_ERROR; + } + + auto output_shape_num = param->output_shapes.size(); + auto output_dtype_num = param->output_dtypes.size(); + if (output_shape_num != output_dtype_num) { + MS_LOG(ERROR) << "Output shape number: " << output_shape_num << " and dtype number: " << output_dtype_num + << " are not equal"; + return RET_ERROR; + } + + ret = DoParseFormats(param_string.output_formats, output_shape_num, &(param->output_formats)); + if (ret != RET_OK) { + MS_LOG(ERROR) << "Parse output formats of third party param failed"; + return RET_ERROR; + } + + const std::string kOutputNamePrefix = "out"; + ret = DoParseNames(param_string.output_names, output_shape_num, kOutputNamePrefix, &(param->output_names)); + if (ret != RET_OK) { + MS_LOG(ERROR) << "Parse output names of third party param failed"; + return RET_ERROR; + } + + ret = DoParseExtendedParameters(param_string.extended_parameters, &(param->extended_parameters)); + if (ret != RET_OK) { + MS_LOG(ERROR) << "Parse extended parameter of third party param failed"; + return RET_ERROR; + } + + return RET_OK; +} +} // namespace lite +} // namespace mindspore diff --git a/mindspore/lite/tools/converter/config_parser/third_party_param_parser.h b/mindspore/lite/tools/converter/config_parser/third_party_param_parser.h new file mode 100644 index 00000000..5cf6e8fb --- /dev/null +++ b/mindspore/lite/tools/converter/config_parser/third_party_param_parser.h @@ -0,0 +1,44 @@ +/** + * Copyright 2023 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_LITE_TOOLS_CONVERTER_CONFIG_PARSER_THIRD_PARTY_PARAM_PARSER_H_ +#define MINDSPORE_LITE_TOOLS_CONVERTER_CONFIG_PARSER_THIRD_PARTY_PARAM_PARSER_H_ +#include +#include +#include +#include "include/errorcode.h" +#include "tools/converter/cxx_api/converter_para.h" +#include "tools/converter/config_parser/config_file_parser.h" + +namespace mindspore { +namespace lite { +class ThirdPartyParamParser { + public: + static int Parse(const lite::ThirdPartyModelString ¶m_string, ThirdPartyModelParam *param); + + private: + static int DoParseShape(const std::string &src, std::vector> *dst_shapes); + static int DoParseExtendedParameters(const std::string &src, + std::map> *dst_ext_param); + static int DoParseDtypes(const std::string &src, std::vector *dst_dtypes); + static int DoParseNames(const std::string &src, size_t num, const std::string &default_prefix, + std::vector *dst_names); + static int DoParseFormats(const std::string &src, size_t num, std::vector *result_formats); +}; +} // namespace lite +} // namespace mindspore + +#endif // MINDSPORE_LITE_TOOLS_CONVERTER_CONFIG_PARSER_THIRD_PARTY_PARAM_PARSER_H_ diff --git a/mindspore/lite/tools/converter/converter.cc b/mindspore/lite/tools/converter/converter.cc index df3176c2..a61bd51c 100644 --- a/mindspore/lite/tools/converter/converter.cc +++ b/mindspore/lite/tools/converter/converter.cc @@ -49,6 +49,7 @@ #include "tools/converter/config_parser/preprocess_parser.h" #include "tools/converter/config_parser/quant_param_parser.h" #include "tools/converter/config_parser/graph_kernel_param_parser.h" +#include "tools/converter/config_parser/third_party_param_parser.h" #include "tools/converter/converter_funcgraph.h" #include "tools/converter/converter_metagraph.h" #include "tools/common/string_util.h" @@ -472,6 +473,12 @@ int ConverterImpl::ParseParam(lite::ConfigFileParser *config_parser, const std:: MS_LOG(ERROR) << "Parse mixed bit weight quant param failed."; return ret; } + ret = lite::ThirdPartyParamParser::Parse(config_parser->GetThirdPartyModelString(), + ¶m->thirdPartyModelParam); + if (ret != RET_OK) { + MS_LOG(ERROR) << "Parse third party param failed."; + return ret; + } ret = InitExtendedIntegrationInfo(param, *config_parser); if (ret != RET_OK) { MS_LOG(ERROR) << "Parse extended integration info failed."; @@ -699,19 +706,20 @@ std::string ConverterImpl::GetStrFromConfigFile(const std::string &file, const s int CheckFmkType(const std::shared_ptr ¶m) { if (param != nullptr) { - std::set valid_values = {FmkType::kFmkTypeTf, FmkType::kFmkTypeCaffe, FmkType::kFmkTypeOnnx, - FmkType::kFmkTypeMs, FmkType::kFmkTypeTflite, FmkType::kFmkTypePytorch, - FmkType::kFmkTypeMsLite}; - if (std::find(valid_values.begin(), valid_values.end(), param->fmk_type) == valid_values.end()) { - MS_LOG(ERROR) << "INPUT ILLEGAL: fmk_type must be " - "kFmkTypeTf|kFmkTypeCaffe|kFmkTypeOnnx|kFmkTypeMs|kFmkTypeTflite|kFmkTypeMsLite" - << ", but got " << param->fmk_type; - return RET_INPUT_PARAM_INVALID; - } - if (param->fmk_type != converter::kFmkTypeCaffe && !param->weight_file.empty()) { - MS_LOG(ERROR) << "INPUT ILLEGAL: weight_file is not a valid flag"; - return RET_INPUT_PARAM_INVALID; - } + return RET_OK; + } + std::set kValidFmkTypes = {FmkType::kFmkTypeTf, FmkType::kFmkTypeCaffe, FmkType::kFmkTypeOnnx, + FmkType::kFmkTypeMs, FmkType::kFmkTypeTflite, FmkType::kFmkTypePytorch, + FmkType::kFmkTypeMsLite, FmkType::kFmkTypeThirdParty}; + if (kValidFmkTypes.find(param->fmk_type) == kValidFmkTypes.end()) { + MS_LOG(ERROR) << "INPUT ILLEGAL: fmk_type must be " + "TF|CAFFE|ONNX|MS|TFLITE|PYTORCH|MSLITE|THIRDPARTY" + << ", but got " << param->fmk_type; + return RET_INPUT_PARAM_INVALID; + } + if (param->fmk_type != converter::kFmkTypeCaffe && !param->weight_file.empty()) { + MS_LOG(ERROR) << "INPUT ILLEGAL: weight_file is not a valid flag"; + return RET_INPUT_PARAM_INVALID; } return RET_OK; } diff --git a/mindspore/lite/tools/converter/converter_funcgraph.cc b/mindspore/lite/tools/converter/converter_funcgraph.cc index f03f995c..61d5c463 100644 --- a/mindspore/lite/tools/converter/converter_funcgraph.cc +++ b/mindspore/lite/tools/converter/converter_funcgraph.cc @@ -90,6 +90,7 @@ FuncGraphPtr ConverterFuncGraph::Load3rdModelToFuncgraph(const std::shared_ptrsave_type; converter_parameters.model_file = param->model_file; converter_parameters.weight_file = param->weight_file; + converter_parameters.attrs.emplace("config_file", param->config_file); func_graph_base = model_parser->Parse(converter_parameters); if (func_graph_base == nullptr) { delete model_parser; @@ -447,11 +448,13 @@ STATUS ConverterFuncGraph::Optimize(const std::shared_ptr ¶m, return status; } - AnfTransform funcgraph_transform; - status = funcgraph_transform.Transform(func_graph, param); - if (status != RET_OK) { - MS_LOG(ERROR) << "Transform anf graph failed."; - return status; + if (param->fmk_type != converter::FmkType::kFmkTypeThirdParty) { + AnfTransform funcgraph_transform; + status = funcgraph_transform.Transform(func_graph, param); + if (status != RET_OK) { + MS_LOG(ERROR) << "Transform anf graph failed."; + return status; + } } status = UnifyFuncGraphOutputFormat(param, func_graph); diff --git a/mindspore/lite/tools/converter/converter_lite/converter_flags.cc b/mindspore/lite/tools/converter/converter_lite/converter_flags.cc index 4883c48d..024e209f 100644 --- a/mindspore/lite/tools/converter/converter_lite/converter_flags.cc +++ b/mindspore/lite/tools/converter/converter_lite/converter_flags.cc @@ -138,11 +138,11 @@ int Flags::InitFmk() { // value check not here, it is in converter c++ API's CheckValueParam method. std::map StrToEnumFmkTypeMap = { {"CAFFE", kFmkTypeCaffe}, {"MINDIR", kFmkTypeMs}, {"TFLITE", kFmkTypeTflite}, {"ONNX", kFmkTypeOnnx}, - {"TF", kFmkTypeTf}, {"PYTORCH", kFmkTypePytorch}, {"MSLITE", kFmkTypeMsLite}}; + {"TF", kFmkTypeTf}, {"PYTORCH", kFmkTypePytorch}, {"MSLITE", kFmkTypeMsLite}, {"THIRDPARTY", kFmkTypeThirdParty}}; if (StrToEnumFmkTypeMap.find(this->fmkIn) != StrToEnumFmkTypeMap.end()) { this->fmk = StrToEnumFmkTypeMap.at(this->fmkIn); } else { - std::cerr << "INPUT ILLEGAL: fmk must be TF|TFLITE|CAFFE|MINDIR|ONNX" << std::endl; + std::cerr << "INPUT ILLEGAL: fmk must be TF|TFLITE|CAFFE|MINDIR|ONNX|PYTORCH|THIRDPARTY" << std::endl; return RET_INPUT_PARAM_INVALID; } diff --git a/mindspore/lite/tools/converter/cxx_api/converter_para.h b/mindspore/lite/tools/converter/cxx_api/converter_para.h index a4f72a69..33210fd0 100644 --- a/mindspore/lite/tools/converter/cxx_api/converter_para.h +++ b/mindspore/lite/tools/converter/cxx_api/converter_para.h @@ -21,6 +21,7 @@ #include #include #include "include/converter.h" +#include "mindapi/base/type_id.h" #include "tools/converter/quantizer/quant_params.h" #include "tools/converter/preprocess/preprocess_param.h" #include "tools/converter/adapter/acl/common/acl_types.h" @@ -35,6 +36,18 @@ struct ParallelSplitConfig { std::vector parallel_devices_; }; +struct ThirdPartyModelParam { + std::vector input_dtypes; + std::vector> input_shapes; + std::vector input_names; + std::vector input_formats; + std::vector output_dtypes; + std::vector> output_shapes; + std::vector output_names; + std::vector output_formats; + std::map> extended_parameters; +}; + struct CpuOptionCfg { std::string architecture; std::string instruction; @@ -97,6 +110,7 @@ struct ConverterPara { lite::acl::AclModelOptionCfg aclModelOptionCfgParam; lite::micro::MicroParam microParam; ParallelSplitConfig parallel_split_config; + ThirdPartyModelParam thirdPartyModelParam; AscendGeOptionCfg ascendGeOptionCfg; std::string device; std::string provider; diff --git a/mindspore/lite/tools/converter/graphdef_transform.cc b/mindspore/lite/tools/converter/graphdef_transform.cc index 90b744e5..bf1a82ae 100644 --- a/mindspore/lite/tools/converter/graphdef_transform.cc +++ b/mindspore/lite/tools/converter/graphdef_transform.cc @@ -76,11 +76,55 @@ int QuantTransform(const std::shared_ptr ¶m, schema::MetaGrap } return RET_OK; } + +int FillGraphOutputShape(MetaGraphT *meta_graph, const std::vector> output_shapes) { + const auto &out_indices = meta_graph->outputIndex; + for (size_t i = 0; i < out_indices.size(); i++) { + auto &out_tensor = meta_graph->allTensors[out_indices[i]]; + out_tensor->dims = {}; + for (size_t k = 0; k < output_shapes[i].size(); k++) { + out_tensor->dims.push_back(static_cast(output_shapes[i][k])); + } + } + return RET_OK; +} + +void FillGraphInputAndOutputFormats(MetaGraphT *meta_graph, const ConverterPara ¶) { + const auto &in_indices = meta_graph->inputIndex; + for (size_t i = 0; i < in_indices.size(); i++) { + auto &in_tensor = meta_graph->allTensors[in_indices[i]]; + in_tensor->format = para.thirdPartyModelParam.input_formats[i]; + MS_LOG_DEBUG << "input " << i << " format: " << EnumNameFormat(in_tensor->format); + } + + const auto &out_indices = meta_graph->outputIndex; + for (size_t i = 0; i < out_indices.size(); i++) { + auto &out_tensor = meta_graph->allTensors[out_indices[i]]; + out_tensor->format = para.thirdPartyModelParam.output_formats[i]; + MS_LOG_DEBUG << "output " << i << " format: " << EnumNameFormat(out_tensor->format); + } +} } // namespace int GraphDefTransform::Transform(const std::shared_ptr ¶m) { MS_ASSERT(param != nullptr); STATUS status; + + if (param->fmk_type == converter::kFmkTypeThirdParty) { + + // Legacy optimizer infer shape, but op Custom which wraps third party model has no infer-shape function. + // So we don't perform legacy optimization for kFmkTypeThirdParty case. + auto ret = FillGraphOutputShape(graph_defT_, param->thirdPartyModelParam.output_shapes); + if (ret != RET_OK) { + MS_LOG(ERROR) << "Fill output shape of third party model failed, ret:" << ret; + return ret; + } + + // Tensor of FuncGraph has no attribute of format, so set format in MetaGraph. + FillGraphInputAndOutputFormats(graph_defT_, *param); + return RET_OK; + } + { auto old_nodes = GetGraphNodes(*graph_defT_); Optimizer unused_op_remove_optimizer; diff --git a/mindspore/lite/tools/converter/parser/third_party/CMakeLists.txt b/mindspore/lite/tools/converter/parser/third_party/CMakeLists.txt new file mode 100644 index 00000000..b55e0194 --- /dev/null +++ b/mindspore/lite/tools/converter/parser/third_party/CMakeLists.txt @@ -0,0 +1,4 @@ +add_library(third_party_parser_mid OBJECT third_party_model_parser.cc) +add_dependencies(third_party_parser_mid proto_mid) +add_dependencies(third_party_parser_mid fbs_src) +add_dependencies(third_party_parser_mid fbs_inner_src) \ No newline at end of file diff --git a/mindspore/lite/tools/converter/parser/third_party/third_party_model_parser.cc b/mindspore/lite/tools/converter/parser/third_party/third_party_model_parser.cc new file mode 100644 index 00000000..652db4af --- /dev/null +++ b/mindspore/lite/tools/converter/parser/third_party/third_party_model_parser.cc @@ -0,0 +1,277 @@ +/** + * Copyright 2023 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "tools/converter/parser/third_party/third_party_model_parser.h" +#include +#include +#include +#include "ir/value.h" +#include "mindapi/base/type_id.h" +#include "src/common/log_util.h" +#include "src/common/file_utils.h" +#include "nnacl/op_base.h" +#include "ops/primitive_c.h" +#include "ops/custom.h" +#include "ops/tuple_get_item.h" +#include "ops/make_tuple.h" +#include "ops/return.h" +#include "tools/converter/config_parser/config_file_parser.h" +#include "include/registry/model_parser_registry.h" +#include "tools/common/graph_util.h" +#include "tools/common/tensor_util.h" +#include "tools/converter/converter_context.h" +#include "tools/converter/parser/lite_model_parser_creator.h" + +using mindspore::converter::kFmkTypeThirdParty; + +namespace mindspore { +namespace lite { +api::FuncGraphPtr ThirdPartyModelParser::Parse(const converter::ConverterParameters &flag) { + model_file_ = flag.model_file; + auto &attrs = flag.attrs; + auto iter = attrs.find("config_file"); + if (iter == attrs.end()) { + return nullptr; + } + auto config_file = iter->second; + + auto ret = InitConfig(config_file); + if (ret != RET_OK) { + MS_LOG(ERROR) << "Init config for third party model parsing failed"; + return nullptr; + } + + return CreateFuncGraph(); +} + +STATUS ThirdPartyModelParser::InitConfig(const std::string &config_file) { + lite::ConfigFileParser config_parser; + if (config_file.empty()) { + MS_LOG(ERROR) << "Missing config file in converting third party model"; + return RET_ERROR; + } + auto ret = config_parser.ParseConfigFile(config_file); + if (ret != RET_OK) { + MS_LOG(ERROR) << "Get third party model section from config file failed"; + return RET_ERROR; + } + + ret = ThirdPartyParamParser::Parse(config_parser.GetThirdPartyModelString(), ¶m_); + if (ret != RET_OK) { + MS_LOG(ERROR) << "Parse third party model param failed."; + return ret; + } + return RET_OK; +} + +api::FuncGraphPtr ThirdPartyModelParser::CreateFuncGraph() { + auto func_graph = std::make_shared(); + MS_CHECK_TRUE_RET(func_graph != nullptr, nullptr); + auto type_value = MakeValue(static_cast(converter::kFmkTypeThirdParty)); + MS_CHECK_TRUE_RET(type_value != nullptr, nullptr); + func_graph->set_attr("fmk", type_value); + auto attr_value = MakeValue("third_party"); + MS_CHECK_TRUE_RET(attr_value != nullptr, nullptr); + func_graph->set_attr("graph_name", attr_value); + + std::vector input_nodes = {}; + auto ret = BuildGraphInputs(func_graph, &input_nodes); + if (ret != RET_OK) { + MS_LOG(ERROR) << "Create func graph input nodes failed"; + return nullptr; + } + + CNodePtr custom_node = nullptr; + ret = BuildCustomOp(func_graph, input_nodes, &custom_node); + if (ret != RET_OK) { + MS_LOG(ERROR) << "Create func graph custom op node failed"; + return nullptr; + } + + ret = BuildGraphOutputs(func_graph, custom_node); + if (ret != RET_OK) { + MS_LOG(ERROR) << "Create func graph output nodes failed"; + return nullptr; + } + + static auto manager = Manage(func_graph); + func_graph->set_manager(manager); + + auto result_graph = api::MakeShared(func_graph); + return result_graph; +} + +STATUS ThirdPartyModelParser::BuildGraphInputs(const FuncGraphPtr &func_graph, std::vector *op_inputs) { + MS_ASSERT(anf_node_map != nullptr && func_graph != nullptr); + auto &dtypes = param_.input_dtypes; + auto &shapes = param_.input_shapes; + auto &names = param_.input_names; + + auto input_size = dtypes.size(); + + // Create parameter nodes for graph inputs + for (size_t i = 0; i < input_size; i++) { + auto parameter = func_graph->add_parameter(); + MSLITE_CHECK_PTR(parameter); + auto abstract_tensor = CreateTensorAbstract(shapes[i], dtypes[i]); + if (abstract_tensor == nullptr) { + MS_LOG(ERROR) << "Create tensor abstract failed"; + return RET_ERROR; + } + parameter->set_abstract(abstract_tensor); + parameter->set_name(names[i]); + op_inputs->push_back(parameter); + } + + // Create parameter nodes for const tensor which wrapped third model buffer. + size_t model_size = 0U; + auto model_data = ReadFile(model_file_.c_str(), &model_size); + std::vector model_shape = {static_cast(model_size)}; + auto tensor_info = CreateTensorInfo(nullptr, 0, model_shape, kNumberTypeUInt8); + if (tensor_info == nullptr) { + MS_LOG(ERROR) << "init tensor info failed"; + delete model_data; + return RET_NULL_PTR; + } + auto tensor_data = reinterpret_cast(tensor_info->data_c()); + if (memcpy_s(tensor_data, tensor_info->Size(), model_data, model_size) != EOK) { + MS_LOG(ERROR) << "memcpy failed."; + delete model_data; + return RET_ERROR; + } + delete model_data; + auto parameter = func_graph->add_parameter(); + MSLITE_CHECK_PTR(parameter); + auto status = InitParameterFromTensorInfo(parameter, tensor_info); + if (status != RET_OK) { + MS_LOG(ERROR) << "init parameter from tensor info failed."; + return RET_ERROR; + } + parameter->set_name("ThirdPartyModel"); + op_inputs->push_back(parameter); + return RET_OK; +} + +STATUS ThirdPartyModelParser::BuildCustomOp(const FuncGraphPtr &func_graph, const std::vector &op_inputs, + CNodePtr *operator_node) { + MS_ASSERT(anf_node_map != nullptr && func_graph != nullptr); + NotSupportOp::GetInstance()->set_fmk_type("THIRDPARTY"); + STATUS status = RET_OK; + + // create primitive and build CNode of CUSTOM operator + ops::PrimitiveCPtr primitive_c; + auto prim = std::make_unique(); + MS_CHECK_TRUE_RET(prim != nullptr, RET_ERROR); + prim->set_type("ThirdPartyModel"); + + const auto &attr = param_.extended_parameters; + prim->set_attr(attr); + primitive_c = prim->GetPrim(); + if (primitive_c == nullptr) { + MS_LOG(ERROR) << "failed to create primitive: custom"; + return RET_ERROR; + } + + auto operator_cnode = func_graph->NewCNode(primitive_c, op_inputs); + MSLITE_CHECK_PTR(operator_cnode); + operator_cnode->set_fullname_with_scope("Custom"); + *operator_node = operator_cnode; + return status; +} + +STATUS ThirdPartyModelParser::BuildGraphOutputs(const FuncGraphPtr &func_graph, const CNodePtr &operator_node) { + MS_ASSERT(anf_node_map != nullptr && func_graph != nullptr); + + auto dtypes = param_.output_dtypes; + auto shapes = param_.output_shapes; + auto names = param_.output_names; + + auto output_size = dtypes.size(); + std::vector output_nodes = {}; + + // Use TupleGetItem to wrap op outputs. + AbstractBasePtrList abstract_list; + for (size_t i = 0; i < output_size; i++) { + auto abstract_tensor = CreateTensorAbstract(shapes[i], dtypes[i]); + if (abstract_tensor == nullptr) { + MS_LOG(ERROR) << "Create tensor abstract failed"; + return RET_ERROR; + } + abstract_list.emplace_back(abstract_tensor); + auto tuple_get_item_prim_ptr = std::make_shared(); + if (tuple_get_item_prim_ptr == nullptr) { + MS_LOG(ERROR) << "new TupleGetItem failed"; + return RET_NULL_PTR; + } + auto tuple_get_item_prim_c = tuple_get_item_prim_ptr->GetPrim(); + MSLITE_CHECK_PTR(tuple_get_item_prim_c); + auto tuple_get_item_prim = NewValueNode(tuple_get_item_prim_c); + MSLITE_CHECK_PTR(tuple_get_item_prim); + auto get_item_value = NewValueNode(MakeValue(i)); + MSLITE_CHECK_PTR(get_item_value); + std::vector inputs = {tuple_get_item_prim, operator_node, get_item_value}; + CNodePtr get_item_cnode = func_graph->NewCNode(inputs); + MSLITE_CHECK_PTR(get_item_cnode); + std::string output_item_name = operator_node->fullname_with_scope() + "_getitem_" + std::to_string(i); + auto get_item_abstract = CreateTensorAbstract({}, kNumberTypeFloat32); + if (get_item_abstract == nullptr) { + MS_LOG(ERROR) << "Create tensor abstarct failed"; + return RET_ERROR; + } + get_item_cnode->set_fullname_with_scope(output_item_name); + get_item_cnode->set_abstract(get_item_abstract); + output_nodes.push_back(get_item_cnode); + } + auto abstract_tuple = std::make_shared(abstract_list); + MSLITE_CHECK_PTR(abstract_tuple); + operator_node->set_abstract(abstract_tuple); + + // Use MakeTuple node to wrap all outputs as single input of Return node. + auto make_tuple_prim_ptr = std::make_shared(); + if (make_tuple_prim_ptr == nullptr) { + MS_LOG(ERROR) << "new MakeTuple failed"; + return RET_NULL_PTR; + } + auto make_tuple_prim_c = make_tuple_prim_ptr->GetPrim(); + MSLITE_CHECK_PTR(make_tuple_prim_c); + auto make_tuple_prim = NewValueNode(make_tuple_prim_c); + MSLITE_CHECK_PTR(make_tuple_prim); + std::vector make_tuple_inputs = output_nodes; + make_tuple_inputs.insert(make_tuple_inputs.begin(), make_tuple_prim); + auto make_tuple_cnode = func_graph->NewCNode(make_tuple_inputs); + MSLITE_CHECK_PTR(make_tuple_cnode); + make_tuple_cnode->set_fullname_with_scope("return_tuple"); + + auto return_prim_ptr = std::make_shared(); + if (return_prim_ptr == nullptr) { + MS_LOG(ERROR) << "new Return failed"; + return RET_NULL_PTR; + } + auto return_prim_c = return_prim_ptr->GetPrim(); + MSLITE_CHECK_PTR(return_prim_c); + std::vector op_inputs{make_tuple_cnode}; + auto cnode = func_graph->NewCNode(return_prim_c, op_inputs); + MSLITE_CHECK_PTR(cnode); + cnode->set_fullname_with_scope("Return"); + func_graph->set_return(cnode); + + // Save original output tensor names. + ConverterInnerContext::GetInstance()->SetGraphOutputTensorNames(names); + return RET_OK; +} + +REG_MODEL_PARSER(kFmkTypeThirdParty, LiteModelParserCreator) +} // namespace lite +} // namespace mindspore diff --git a/mindspore/lite/tools/converter/parser/third_party/third_party_model_parser.h b/mindspore/lite/tools/converter/parser/third_party/third_party_model_parser.h new file mode 100644 index 00000000..c4b197b8 --- /dev/null +++ b/mindspore/lite/tools/converter/parser/third_party/third_party_model_parser.h @@ -0,0 +1,50 @@ +/** + * Copyright 2023 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_LITE_TOOLS_CONVERTER_PARSER_THIRDPARTY_THIRDPARTY_MODEL_PARSER_H_ +#define MINDSPORE_LITE_TOOLS_CONVERTER_PARSER_THIRDPARTY_THIRDPARTY_MODEL_PARSER_H_ + +#include +#include +#include "schema/inner/model_generated.h" +#include "base/base.h" +#include "ir/anf.h" +#include "ir/func_graph.h" +#include "include/errorcode.h" +#include "include/registry/model_parser.h" +#include "tools/converter/config_parser/third_party_param_parser.h" + +namespace mindspore { +namespace lite { +class ThirdPartyModelParser : public converter::ModelParser { + public: + api::FuncGraphPtr Parse(const converter::ConverterParameters &flag) override; + + private: + STATUS InitConfig(const std::string &config_file); + api::FuncGraphPtr CreateFuncGraph(); + STATUS BuildGraphInputs(const FuncGraphPtr &func_graph, std::vector *op_inputs); + STATUS BuildCustomOp(const FuncGraphPtr &func_graph, const std::vector &op_inputs, + CNodePtr *operator_node); + STATUS BuildGraphOutputs(const FuncGraphPtr &func_graph, const CNodePtr &operator_node); + + std::string model_file_ = ""; + ThirdPartyModelParam param_; +}; +} // namespace lite +} // namespace mindspore + +#endif // MINDSPORE_LITE_TOOLS_CONVERTER_PARSER_THIRDPARTY_THIRDPARTY_MODEL_PARSER_H_ diff --git a/mindspore/lite/tools/converter/registry/model_parser_registry.cc b/mindspore/lite/tools/converter/registry/model_parser_registry.cc index 832fb92d..6bc2d4d3 100644 --- a/mindspore/lite/tools/converter/registry/model_parser_registry.cc +++ b/mindspore/lite/tools/converter/registry/model_parser_registry.cc @@ -26,7 +26,7 @@ std::map model_parser_room; } // namespace ModelParserRegistry::ModelParserRegistry(FmkType fmk, ModelParserCreator creator) { - if (fmk < converter::kFmkTypeTf || fmk > converter::kFmkTypePytorch) { + if (fmk < converter::kFmkTypeTf || fmk >= converter::kFmkTypeEnd) { MS_LOG(ERROR) << "ILLEGAL FMK: fmk must be in FmkType."; return; } @@ -38,7 +38,7 @@ ModelParserRegistry::ModelParserRegistry(FmkType fmk, ModelParserCreator creator } converter::ModelParser *ModelParserRegistry::GetModelParser(FmkType fmk) { - if (fmk < converter::kFmkTypeTf || fmk > converter::kFmkTypePytorch) { + if (fmk < converter::kFmkTypeTf || fmk >= converter::kFmkTypeEnd) { MS_LOG(ERROR) << "ILLEGAL FMK: fmk must be in FmkType."; return nullptr; } -- 2.17.1