1From 6576d2861ab7e95c7e7b6a284ebc5f3159f4398d Mon Sep 17 00:00:00 2001 2From: z00574805 <z00574805@notesmail.huawei.com/> 3Date: Wed, 24 May 2023 11:04:47 +0800 4Subject: [PATCH 2/5] xiaoyi-0002 5 6--- 7 mindspore/lite/tools/converter/converter.cc | 2 +- 8 .../converter/micro/cmake/file_list.cmake | 13 + 9 .../micro/coder/allocator/allocator.cc | 19 +- 10 .../micro/coder/allocator/allocator.h | 9 +- 11 .../lite/tools/converter/micro/coder/coder.cc | 8 +- 12 .../lite/tools/converter/micro/coder/coder.h | 4 +- 13 .../lite/tools/converter/micro/coder/graph.cc | 30 +- 14 .../lite/tools/converter/micro/coder/graph.h | 2 +- 15 .../nnacl/fp16/activation_fp16_coder.cc | 84 +++++ 16 .../nnacl/fp16/activation_fp16_coder.h | 37 +++ 17 .../nnacl/fp16/arithmetic_fp16_coder.cc | 175 +++++++++++ 18 .../nnacl/fp16/arithmetic_fp16_coder.h | 47 +++ 19 .../nnacl/fp16/avg_pooling_fp16_coder.cc | 87 ++++++ 20 .../nnacl/fp16/avg_pooling_fp16_coder.h | 36 +++ 21 .../opcoders/nnacl/fp16/concat_fp16_coder.cc | 88 ++++++ 22 .../opcoders/nnacl/fp16/concat_fp16_coder.h | 42 +++ 23 .../nnacl/fp16/layernorm_fp16_coder.cc | 56 ++++ 24 .../nnacl/fp16/layernorm_fp16_coder.h | 37 +++ 25 .../nnacl/fp16/matmul_fp16_base_coder.cc | 286 ++++++++++++++++++ 26 .../nnacl/fp16/matmul_fp16_base_coder.h | 58 ++++ 27 .../opcoders/nnacl/fp16/matmul_fp16_coder.cc | 79 +++++ 28 .../opcoders/nnacl/fp16/matmul_fp16_coder.h | 44 +++ 29 .../opcoders/nnacl/fp16/reduce_fp16_coder.cc | 75 +++++ 30 .../opcoders/nnacl/fp16/reduce_fp16_coder.h | 40 +++ 31 .../opcoders/nnacl/fp16/resize_fp16_coder.cc | 108 +++++++ 32 .../opcoders/nnacl/fp16/resize_fp16_coder.h | 41 +++ 33 .../nnacl/fp16/transpose_fp16_coder.cc | 140 +++++++++ 34 .../nnacl/fp16/transpose_fp16_coder.h | 43 +++ 35 .../nnacl/fp32/activation_fp32_coder.cc | 8 +- 36 .../nnacl/fp32/activation_fp32_coder.h | 2 +- 37 .../nnacl/fp32/arithmetic_fp32_coder.cc | 38 ++- 38 .../nnacl/fp32/arithmetic_fp32_coder.h | 34 ++- 39 .../opcoders/nnacl/fp32/concat_fp32_coder.h | 2 +- 40 .../opcoders/nnacl/fp32/exp_fp32_coder.cc | 2 +- 41 .../opcoders/nnacl/fp32/gather_fp32_coder.cc | 2 + 42 .../nnacl/fp32/layernorm_fp32_coder.cc | 81 +++++ 43 .../nnacl/fp32/layernorm_fp32_coder.h | 40 +++ 44 .../opcoders/nnacl/fp32/lstm_fp32_coder.cc | 18 +- 45 .../nnacl/fp32/matmul_fp32_base_coder.cc | 104 ++++--- 46 .../nnacl/fp32/matmul_fp32_base_coder.h | 29 +- 47 .../opcoders/nnacl/fp32/pooling_fp32_coder.h | 2 +- 48 .../opcoders/nnacl/fp32/reduce_fp32_coder.cc | 40 +-- 49 .../opcoders/nnacl/fp32/reduce_fp32_coder.h | 14 +- 50 .../opcoders/nnacl/fp32/resize_fp32_coder.cc | 31 +- 51 .../opcoders/nnacl/fp32/resize_fp32_coder.h | 11 +- 52 .../opcoders/nnacl/fp32/scale_fp32_coder.cc | 38 +-- 53 .../opcoders/nnacl/fp32/scale_fp32_coder.h | 2 - 54 .../opcoders/nnacl/fp32/split_fp32_coder.cc | 77 +++++ 55 .../opcoders/nnacl/fp32/split_fp32_coder.h | 37 +++ 56 .../nnacl/fp32/transpose_fp32_coder.cc | 14 +- 57 .../nnacl/fp32/transpose_fp32_coder.h | 12 +- 58 .../nnacl_serializer/nnacl_fp32_serializer.cc | 19 ++ 59 .../nnacl_serializer/nnacl_fp32_serializer.h | 6 + 60 .../tools/converter/micro/coder/session.cc | 5 +- 61 .../tools/converter/micro/coder/session.h | 3 +- 62 55 files changed, 2140 insertions(+), 221 deletions(-) 63 create mode 100644 mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp16/activation_fp16_coder.cc 64 create mode 100644 mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp16/activation_fp16_coder.h 65 create mode 100644 mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp16/arithmetic_fp16_coder.cc 66 create mode 100644 mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp16/arithmetic_fp16_coder.h 67 create mode 100644 mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp16/avg_pooling_fp16_coder.cc 68 create mode 100644 mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp16/avg_pooling_fp16_coder.h 69 create mode 100644 mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp16/concat_fp16_coder.cc 70 create mode 100644 mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp16/concat_fp16_coder.h 71 create mode 100644 mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp16/layernorm_fp16_coder.cc 72 create mode 100644 mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp16/layernorm_fp16_coder.h 73 create mode 100644 mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp16/matmul_fp16_base_coder.cc 74 create mode 100644 mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp16/matmul_fp16_base_coder.h 75 create mode 100644 mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp16/matmul_fp16_coder.cc 76 create mode 100644 mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp16/matmul_fp16_coder.h 77 create mode 100644 mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp16/reduce_fp16_coder.cc 78 create mode 100644 mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp16/reduce_fp16_coder.h 79 create mode 100644 mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp16/resize_fp16_coder.cc 80 create mode 100644 mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp16/resize_fp16_coder.h 81 create mode 100644 mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp16/transpose_fp16_coder.cc 82 create mode 100644 mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp16/transpose_fp16_coder.h 83 create mode 100644 mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp32/layernorm_fp32_coder.cc 84 create mode 100644 mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp32/layernorm_fp32_coder.h 85 create mode 100644 mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp32/split_fp32_coder.cc 86 create mode 100644 mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp32/split_fp32_coder.h 87 88diff --git a/mindspore/lite/tools/converter/converter.cc b/mindspore/lite/tools/converter/converter.cc 89index eaa18d6b..944ed29c 100644 90--- a/mindspore/lite/tools/converter/converter.cc 91+++ b/mindspore/lite/tools/converter/converter.cc 92@@ -799,7 +799,7 @@ int RunConverter(const std::shared_ptr<ConverterPara> ¶m, void **model_data, 93 if (param->microParam.enable_micro) { 94 status = micro::Coder::MicroSourceCodeGeneration(*meta_graph, param->output_file, param->microParam.codegen_mode, 95 param->microParam.target, param->microParam.support_parallel, 96- param->microParam.debug_mode); 97+ param->microParam.debug_mode, param->weight_fp16); 98 if (status != RET_OK) { 99 delete meta_graph; 100 CONVERTER_LOG_ERROR("MICRO CODEGEN FAILED:" << status << " " << GetErrorInfo(status)); 101diff --git a/mindspore/lite/tools/converter/micro/cmake/file_list.cmake b/mindspore/lite/tools/converter/micro/cmake/file_list.cmake 102index 843b523e..9ae54538 100644 103--- a/mindspore/lite/tools/converter/micro/cmake/file_list.cmake 104+++ b/mindspore/lite/tools/converter/micro/cmake/file_list.cmake 105@@ -66,6 +66,17 @@ set(CODER_OPCODERS_SRC 106 ${MICRO_DIR}/coder/opcoders/cmsis-nn/int8/pooling_int8_coder.cc 107 ${MICRO_DIR}/coder/opcoders/cmsis-nn/int8/reshape_int8_coder.cc 108 ${MICRO_DIR}/coder/opcoders/cmsis-nn/int8/softmax_int8_coder.cc 109+ #### nnacl fp16 coder 110+ ${MICRO_DIR}/coder/opcoders/nnacl/fp16/activation_fp16_coder.cc 111+ ${MICRO_DIR}/coder/opcoders/nnacl/fp16/arithmetic_fp16_coder.cc 112+ ${MICRO_DIR}/coder/opcoders/nnacl/fp16/avg_pooling_fp16_coder.cc 113+ ${MICRO_DIR}/coder/opcoders/nnacl/fp16/concat_fp16_coder.cc 114+ ${MICRO_DIR}/coder/opcoders/nnacl/fp16/transpose_fp16_coder.cc 115+ ${MICRO_DIR}/coder/opcoders/nnacl/fp16/matmul_fp16_coder.cc 116+ ${MICRO_DIR}/coder/opcoders/nnacl/fp16/matmul_fp16_base_coder.cc 117+ ${MICRO_DIR}/coder/opcoders/nnacl/fp16/layernorm_fp16_coder.cc 118+ ${MICRO_DIR}/coder/opcoders/nnacl/fp16/reduce_fp16_coder.cc 119+ ${MICRO_DIR}/coder/opcoders/nnacl/fp16/resize_fp16_coder.cc 120 #### nnacl fp32 coder 121 ${MICRO_DIR}/coder/opcoders/nnacl/fp32/activation_fp32_coder.cc 122 ${MICRO_DIR}/coder/opcoders/nnacl/fp32/addn_fp32_coder.cc 123@@ -99,6 +110,8 @@ set(CODER_OPCODERS_SRC 124 ${MICRO_DIR}/coder/opcoders/nnacl/fp32/exp_fp32_coder.cc 125 ${MICRO_DIR}/coder/opcoders/nnacl/fp32/deconv2d_fp32_coder.cc 126 ${MICRO_DIR}/coder/opcoders/nnacl/fp32/prelu_fp32_coder.cc 127+ ${MICRO_DIR}/coder/opcoders/nnacl/fp32/layernorm_fp32_coder.cc 128+ ${MICRO_DIR}/coder/opcoders/nnacl/fp32/split_fp32_coder.cc 129 #### nnacl int8 coder 130 ${MICRO_DIR}/coder/opcoders/nnacl/int8/activation_int8_coder.cc 131 ${MICRO_DIR}/coder/opcoders/nnacl/int8/affine_int8_coder.cc 132diff --git a/mindspore/lite/tools/converter/micro/coder/allocator/allocator.cc b/mindspore/lite/tools/converter/micro/coder/allocator/allocator.cc 133index 99ef06de..9c5839b4 100644 134--- a/mindspore/lite/tools/converter/micro/coder/allocator/allocator.cc 135+++ b/mindspore/lite/tools/converter/micro/coder/allocator/allocator.cc 136@@ -22,11 +22,13 @@ 137 138 namespace mindspore::lite::micro { 139 namespace { 140-const std::map<TypeId, size_t> size_map = {{kNumberTypeFloat, sizeof(float)}, {kNumberTypeFloat32, sizeof(float)}, 141- {kNumberTypeInt32, sizeof(int32_t)}, {kNumberTypeInt16, sizeof(int16_t)}, 142- {kNumberTypeInt8, sizeof(int8_t)}, {kNumberTypeUInt8, sizeof(uint8_t)}}; 143+const std::map<TypeId, size_t> size_map = {{kNumberTypeFloat, sizeof(float)}, {kNumberTypeFloat32, sizeof(float)}, 144+ {kNumberTypeInt32, sizeof(int32_t)}, {kNumberTypeInt16, sizeof(int16_t)}, 145+ {kNumberTypeFloat16, sizeof(uint16_t)}, {kNumberTypeInt8, sizeof(int8_t)}, 146+ {kNumberTypeUInt8, sizeof(uint8_t)}}; 147 } 148-void *MemoryAllocator::MallocWeightTensor(TypeId type_id, size_t size, MallocType type) { 149+void *MemoryAllocator::MallocWeightTensor(TypeId type_id, size_t size, MallocType type, 150+ const std::string &tensor_name) { 151 auto item = size_map.find(type_id); 152 MS_CHECK_TRUE_RET_NULL(item != size_map.end(), "unsupported type idnex"); 153 154@@ -36,6 +38,7 @@ void *MemoryAllocator::MallocWeightTensor(TypeId type_id, size_t size, MallocTyp 155 auto cate = type == kOfflinePackWeight ? lite::Category::CONST_TENSOR : lite::Category::VAR; 156 Tensor *weight = new (std::nothrow) lite::Tensor(type_id, shape, mindspore::NHWC, cate); 157 MS_CHECK_PTR_RET_NULL(weight); 158+ weight->set_tensor_name(tensor_name); 159 std::string runtime_addr = kWeightPrefixName + std::to_string(weight_index_++); 160 malloc_weights_addr_.insert(std::make_pair(weight, runtime_addr)); 161 if (type == kOfflinePackWeight) { 162@@ -152,4 +155,12 @@ int MemoryAllocator::Assign(const std::vector<Tensor *> &inputs, 163 RecordOriginWeightsAddr(nodes); 164 return AssignTensors(nodes); 165 } 166+ 167+void MemoryAllocator::MarkSharedWeight(const Tensor *src, void *pack_weight) { 168+ shared_pack_weights_[src] = pack_weight; 169+} 170+ 171+void *MemoryAllocator::GetSharedWeightAddr(const Tensor *src) { 172+ return shared_pack_weights_.find(src) == shared_pack_weights_.end() ? nullptr : shared_pack_weights_[src]; 173+} 174 } // namespace mindspore::lite::micro 175diff --git a/mindspore/lite/tools/converter/micro/coder/allocator/allocator.h b/mindspore/lite/tools/converter/micro/coder/allocator/allocator.h 176index f8decca1..8a1331fb 100644 177--- a/mindspore/lite/tools/converter/micro/coder/allocator/allocator.h 178+++ b/mindspore/lite/tools/converter/micro/coder/allocator/allocator.h 179@@ -65,9 +65,9 @@ class MemoryAllocator { 180 * in view of weight, bias and workspace 181 */ 182 183- void *Malloc(TypeId type_id, size_t size, MallocType type) { 184+ void *Malloc(TypeId type_id, size_t size, MallocType type, const std::string &tensor_name = "") { 185 if (type != kWorkspace) { 186- return MallocWeightTensor(type_id, size, type); 187+ return MallocWeightTensor(type_id, size, type, tensor_name); 188 } 189 if (size == 0 || size >= UINT_MAX) { 190 return nullptr; 191@@ -138,7 +138,9 @@ class MemoryAllocator { 192 std::map<std::string, Tensor *> saved_weights() const { return saved_weights_addr_; } 193 size_t total_buffer_size() const { return tensors_size_ + workspace_size_; } 194 void enable_is_next() { is_next_ = true; } 195- void *MallocWeightTensor(TypeId type_id, size_t size, MallocType type); 196+ void *MallocWeightTensor(TypeId type_id, size_t size, MallocType type, const std::string &tensor_name = ""); 197+ void MarkSharedWeight(const Tensor *src, void *pack_weight); 198+ void *GetSharedWeightAddr(const Tensor *src); 199 200 private: 201 int AssignTensors(const std::vector<std::unique_ptr<OperatorCoder>> &nodes); 202@@ -162,6 +164,7 @@ class MemoryAllocator { 203 std::map<Tensor *, std::string> origin_weights_addr_; 204 std::map<Tensor *, std::string> malloc_weights_addr_; 205 std::map<Tensor *, std::string> tensors_addr_; 206+ std::map<const Tensor *, void *> shared_pack_weights_; 207 }; 208 } // namespace mindspore::lite::micro 209 #endif // MINDSPORE_LITE_TOOLS_CONVERTER_MICRO_CODER_ALLOCATOR_ALLOCATOR_H_ 210diff --git a/mindspore/lite/tools/converter/micro/coder/coder.cc b/mindspore/lite/tools/converter/micro/coder/coder.cc 211index 9711d4b9..cca4687e 100644 212--- a/mindspore/lite/tools/converter/micro/coder/coder.cc 213+++ b/mindspore/lite/tools/converter/micro/coder/coder.cc 214@@ -29,13 +29,13 @@ 215 #include "tools/converter/micro/coder/generator/component/component.h" 216 217 namespace mindspore::lite::micro { 218-int Coder::Run(const void *model_buff, size_t size) { 219+int Coder::Run(const void *model_buff, size_t size, bool enableFp16) { 220 session_ = CreateCoderSession(); 221 if (session_ == nullptr) { 222 MS_LOG(ERROR) << "new session failed while running!"; 223 return RET_ERROR; 224 } 225- STATUS status = session_->Init(model_buff, size); 226+ STATUS status = session_->Init(model_buff, size, enableFp16); 227 if (status != RET_OK) { 228 MS_LOG(ERROR) << "Init session failed!"; 229 return RET_ERROR; 230@@ -94,7 +94,7 @@ bool Coder::InitPath(const std::string &output_path) { 231 232 int Coder::MicroSourceCodeGeneration(const schema::MetaGraphT &graph, const std::string &output_path, 233 const std::string &codegen_mode, const std::string &device, bool support_parallel, 234- bool debug_mode) { 235+ bool debug_mode, bool enableFp16) { 236 flatbuffers::FlatBufferBuilder builder(kFlatbuffersBuilderInitSize); 237 auto offset = schema::MetaGraph::Pack(builder, &graph); 238 builder.Finish(offset); 239@@ -111,7 +111,7 @@ int Coder::MicroSourceCodeGeneration(const schema::MetaGraphT &graph, const std: 240 MS_LOG(ERROR) << "Codegen init Error"; 241 return RET_ERROR; 242 } 243- status = code_gen.Run(builder.GetBufferPointer(), size); 244+ status = code_gen.Run(builder.GetBufferPointer(), size, enableFp16); 245 if (status != RET_OK) { 246 MS_LOG(ERROR) << "Codegen Run Error"; 247 return RET_ERROR; 248diff --git a/mindspore/lite/tools/converter/micro/coder/coder.h b/mindspore/lite/tools/converter/micro/coder/coder.h 249index 42ba153f..96531e6f 100644 250--- a/mindspore/lite/tools/converter/micro/coder/coder.h 251+++ b/mindspore/lite/tools/converter/micro/coder/coder.h 252@@ -32,11 +32,11 @@ class Coder final { 253 ~Coder() = default; 254 static int MicroSourceCodeGeneration(const schema::MetaGraphT &graph, const std::string &output_path, 255 const std::string &codegen_mode, const std::string &device, 256- bool support_parallel, bool debug_mode); 257+ bool support_parallel, bool debug_mode, bool enableFp16); 258 259 private: 260 int Init(const std::string code_mode, const std::string target, bool support_parallel, bool debug_mode_) const; 261- int Run(const void *model_buff, size_t size); 262+ int Run(const void *model_buff, size_t size, bool enableFp16); 263 bool InitPath(const std::string &output_path); 264 std::shared_ptr<CoderSession> session_{nullptr}; 265 266diff --git a/mindspore/lite/tools/converter/micro/coder/graph.cc b/mindspore/lite/tools/converter/micro/coder/graph.cc 267index b9fa5f2f..ee45d042 100644 268--- a/mindspore/lite/tools/converter/micro/coder/graph.cc 269+++ b/mindspore/lite/tools/converter/micro/coder/graph.cc 270@@ -28,6 +28,7 @@ 271 #include "securec/include/securec.h" 272 #include "src/common/prim_util.h" 273 #include "src/runtime/lite_model.h" 274+#include "base/float16.h" 275 276 namespace mindspore::lite::micro { 277 CoderGraph::~CoderGraph() { 278@@ -41,7 +42,7 @@ CoderGraph::~CoderGraph() { 279 } 280 } 281 282-int CoderGraph::ConvertTensors() { 283+int CoderGraph::ConvertTensors(bool enableFp16) { 284 if (model_ == nullptr) { 285 MS_LOG(ERROR) << "Graph model is nullptr"; 286 return RET_ERROR; 287@@ -86,14 +87,27 @@ int CoderGraph::ConvertTensors() { 288 if (origin_tensor->nodeType() == NodeType_ValueNode && origin_tensor->data() != nullptr && 289 origin_tensor->data()->size() > 0) { 290 // copy data, this is weight && bias 291- MS_CHECK_TRUE_WITH_EXE(origin_tensor->data()->size() > 0, "invalid meta_tensor data size.", delete dstTensor); 292- auto data_size = static_cast<size_t>(origin_tensor->data()->size()); 293- MS_CHECK_RET_CODE_WITH_EXE(dstTensor->MallocData(), "dst tensor malloc data failed!", delete dstTensor); 294- void *dst_data = dstTensor->data(); 295- MS_CHECK_RET_CODE_WITH_EXE(memcpy_s(dst_data, dstTensor->Size(), origin_tensor->data()->data(), data_size), 296- "memcpy_s copy data failed!", delete dstTensor); 297- dstTensor->set_data(dst_data); 298+ if (enableFp16 && origin_data_type == kNumberTypeFloat32) { 299+ dstTensor->set_data_type(kNumberTypeFloat16); 300+ auto data = dstTensor->MutableData(); 301+ MS_CHECK_TRUE_WITH_EXE(data != nullptr, "dst tensor malloc data failed!", delete dstTensor); 302+ auto fp32_data = reinterpret_cast<const float *>(origin_tensor->data()->data()); 303+ auto fp16_data = reinterpret_cast<float16 *>(data); 304+ CHECK_NULL_RETURN(fp32_data); 305+ CHECK_NULL_RETURN(fp16_data); 306+ for (int64_t j = 0; j < dstTensor->ElementsNum(); ++j) { 307+ fp16_data[j] = float16(fp32_data[j]); 308+ } 309+ 310+ } else { 311+ MS_CHECK_RET_CODE_WITH_EXE(memcpy_s(dstTensor->MutableData(), dstTensor->Size(), origin_tensor->data()->data(), 312+ origin_tensor->data()->size()), 313+ "memcpy_s copy data failed!", delete dstTensor); 314+ } 315+ } else if (enableFp16 && origin_data_type == kNumberTypeFloat32) { 316+ dstTensor->set_data_type(kNumberTypeFloat16); 317 } 318+ 319 if (origin_tensor->name() != nullptr) { 320 dstTensor->set_tensor_name(origin_tensor->name()->str()); 321 } 322diff --git a/mindspore/lite/tools/converter/micro/coder/graph.h b/mindspore/lite/tools/converter/micro/coder/graph.h 323index 5fb22f15..35d3cb2b 100644 324--- a/mindspore/lite/tools/converter/micro/coder/graph.h 325+++ b/mindspore/lite/tools/converter/micro/coder/graph.h 326@@ -35,7 +35,7 @@ class CoderGraph { 327 explicit CoderGraph(Model *model) : model_(model) {} 328 ~CoderGraph(); 329 330- int ConvertTensors(); 331+ int ConvertTensors(bool enableFp16); 332 int InitGraphInOutTensors(); 333 334 void SetAllTensors(const std::vector<Tensor *> &all_tensors); 335diff --git a/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp16/activation_fp16_coder.cc b/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp16/activation_fp16_coder.cc 336new file mode 100644 337index 00000000..0fdf0a7f 338--- /dev/null 339+++ b/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp16/activation_fp16_coder.cc 340@@ -0,0 +1,84 @@ 341+/** 342+ * Copyright 2023 Huawei Technologies Co., Ltd 343+ * 344+ * Licensed under the Apache License, Version 2.0 (the "License"); 345+ * you may not use this file except in compliance with the License. 346+ * You may obtain a copy of the License at 347+ * 348+ * http://www.apache.org/licenses/LICENSE-2.0 349+ * 350+ * Unless required by applicable law or agreed to in writing, software 351+ * distributed under the License is distributed on an "AS IS" BASIS, 352+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 353+ * See the License for the specific language governing permissions and 354+ * limitations under the License. 355+ */ 356+#include "coder/opcoders/nnacl/fp16/activation_fp16_coder.h" 357+#include "coder/opcoders/serializers/nnacl_serializer/nnacl_fp32_serializer.h" 358+#include "coder/opcoders/file_collector.h" 359+ 360+using mindspore::schema::PrimitiveType_Activation; 361+ 362+namespace mindspore::lite::micro::nnacl { 363+int ActivationFP16Coder::Prepare(CoderContext *const context) { 364+ if (input_tensor_->data_type() != kNumberTypeFloat16) { 365+ MS_LOG(INFO) << "Input tensor data type is invalid"; 366+ return RET_INPUT_PARAM_INVALID; 367+ } 368+ return RET_OK; 369+} 370+ 371+int ActivationFP16Coder::DoCode(CoderContext *const context) { 372+ // attribute 373+ auto *activation_parameter = reinterpret_cast<ActivationParameter *>(parameter_); 374+ MS_CHECK_PTR(activation_parameter); 375+ int count = input_tensor_->ElementsNum(); 376+ Collect(context, 377+ { 378+ "nnacl/fp16/activation_fp16.h", 379+ }, 380+ { 381+ "activation_fp16.c", 382+ }); 383+ NNaclFp32Serializer code; 384+ 385+ switch (activation_parameter->type_) { 386+ case schema::ActivationType_RELU: 387+ code.CodeFunction("ReluFp16", input_tensor_, output_tensor_, count); 388+ break; 389+ case schema::ActivationType_RELU6: 390+ code.CodeFunction("Relu6Fp16", input_tensor_, output_tensor_, count); 391+ break; 392+ case schema::ActivationType_LEAKY_RELU: 393+ code.CodeFunction("LReluFp16", input_tensor_, output_tensor_, count, activation_parameter->alpha_); 394+ break; 395+ case schema::ActivationType_SIGMOID: 396+ code.CodeFunction("SigmoidFp16", input_tensor_, output_tensor_, count); 397+ break; 398+ case schema::ActivationType_TANH: 399+ code.CodeFunction("TanhFp16", input_tensor_, output_tensor_, count); 400+ break; 401+ case schema::ActivationType_HSWISH: 402+ code.CodeFunction("HSwishFp16", input_tensor_, output_tensor_, count); 403+ break; 404+ case schema::ActivationType_SWISH: 405+ code.CodeFunction("SwishFp16", input_tensor_, output_tensor_, count); 406+ break; 407+ case schema::ActivationType_HSIGMOID: 408+ code.CodeFunction("HSigmoidFp16", input_tensor_, output_tensor_, count); 409+ break; 410+ case schema::ActivationType_ELU: 411+ code.CodeFunction("EluFp16", input_tensor_, output_tensor_, count, activation_parameter->alpha_); 412+ break; 413+ default: 414+ MS_LOG(ERROR) << "Activation type error"; 415+ return RET_ERROR; 416+ } 417+ MS_LOG(DEBUG) << "ActivationFP16Code has been called"; 418+ context->AppendCode(code.str()); 419+ return lite::RET_OK; 420+} 421+ 422+REG_OPERATOR_CODER(kARM32, kNumberTypeFloat16, PrimitiveType_Activation, CPUOpCoderCreator<ActivationFP16Coder>) 423+REG_OPERATOR_CODER(kARM64, kNumberTypeFloat16, PrimitiveType_Activation, CPUOpCoderCreator<ActivationFP16Coder>) 424+} // namespace mindspore::lite::micro::nnacl 425diff --git a/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp16/activation_fp16_coder.h b/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp16/activation_fp16_coder.h 426new file mode 100644 427index 00000000..0390991f 428--- /dev/null 429+++ b/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp16/activation_fp16_coder.h 430@@ -0,0 +1,37 @@ 431+/** 432+ * Copyright 2023 Huawei Technologies Co., Ltd 433+ * 434+ * Licensed under the Apache License, Version 2.0 (the "License"); 435+ * you may not use this file except in compliance with the License. 436+ * You may obtain a copy of the License at 437+ * 438+ * http://www.apache.org/licenses/LICENSE-2.0 439+ * 440+ * Unless required by applicable law or agreed to in writing, software 441+ * distributed under the License is distributed on an "AS IS" BASIS, 442+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 443+ * See the License for the specific language governing permissions and 444+ * limitations under the License. 445+ */ 446+ 447+#ifndef MINDSPORE_LITE_TOOLS_CONVERTER_MICRO_CODER_OPCODERS_NNACL_FP16_ACTIVATION_FP16_CODER_H_ 448+#define MINDSPORE_LITE_TOOLS_CONVERTER_MICRO_CODER_OPCODERS_NNACL_FP16_ACTIVATION_FP16_CODER_H_ 449+ 450+#include <vector> 451+#include "coder/opcoders/nnacl/fp32/activation_fp32_coder.h" 452+ 453+namespace mindspore::lite::micro::nnacl { 454+class ActivationFP16Coder final : public ActivationFP32Coder { 455+ public: 456+ ActivationFP16Coder(const std::vector<Tensor *> &in_tensors, const std::vector<Tensor *> &out_tensors, 457+ const LiteGraph::Node *node, size_t node_index, Target target) 458+ : ActivationFP32Coder(in_tensors, out_tensors, node, node_index, target) {} 459+ 460+ ~ActivationFP16Coder() override = default; 461+ 462+ int Prepare(CoderContext *const context) override; 463+ 464+ int DoCode(CoderContext *const context) override; 465+}; 466+} // namespace mindspore::lite::micro::nnacl 467+#endif // MICRO_CODER_OPCODERS_FP16__CODER_H_ 468diff --git a/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp16/arithmetic_fp16_coder.cc b/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp16/arithmetic_fp16_coder.cc 469new file mode 100644 470index 00000000..a9cdde78 471--- /dev/null 472+++ b/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp16/arithmetic_fp16_coder.cc 473@@ -0,0 +1,175 @@ 474+/** 475+ * Copyright 2023 Huawei Technologies Co., Ltd 476+ * 477+ * Licensed under the Apache License, Version 2.0 (the "License"); 478+ * you may not use this file except in compliance with the License. 479+ * You may obtain a copy of the License at 480+ * 481+ * http://www.apache.org/licenses/LICENSE-2.0 482+ * 483+ * Unless required by applicable law or agreed to in writing, software 484+ * distributed under the License is distributed on an "AS IS" BASIS, 485+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 486+ * See the License for the specific language governing permissions and 487+ * limitations under the License. 488+ */ 489+#include "coder/opcoders/nnacl/fp16/arithmetic_fp16_coder.h" 490+#include "coder/opcoders/file_collector.h" 491+#include "coder/opcoders/parallel.h" 492+#include "coder/log.h" 493+#include "nnacl/broadcast_to_parameter.h" 494+ 495+namespace mindspore::lite::micro::nnacl { 496+void ArithmeticFP16Coder::InitFunTable() { 497+ fun_table_ = { 498+ {PrimitiveType_MulFusion, schema::ActivationType_RELU, "ElementMulReluFp16", "", "", "", ""}, 499+ {PrimitiveType_MulFusion, schema::ActivationType_RELU6, "ElementMulRelu6Fp16", "", "", "", ""}, 500+ {PrimitiveType_MulFusion, schema::ActivationType_NO_ACTIVATION, "ElementMulFp16", "", "", "", ""}, 501+ {PrimitiveType_AddFusion, schema::ActivationType_RELU, "ElementAddReluFp16", "", "", "", ""}, 502+ {PrimitiveType_AddFusion, schema::ActivationType_RELU6, "ElementAddRelu6Fp16", "", "", "", ""}, 503+ {PrimitiveType_AddFusion, schema::ActivationType_NO_ACTIVATION, "ElementAddFp16", "", "", "", ""}, 504+ {PrimitiveType_SubFusion, schema::ActivationType_RELU, "ElementSubReluFp16", "", "", "", ""}, 505+ {PrimitiveType_SubFusion, schema::ActivationType_RELU6, "ElementSubRelu6Fp16", "", "", "", ""}, 506+ {PrimitiveType_SubFusion, schema::ActivationType_NO_ACTIVATION, "ElementSubFp16", "", "", "", ""}, 507+ {PrimitiveType_DivFusion, schema::ActivationType_RELU, "ElementDivReluFp16", "", "", "", ""}, 508+ {PrimitiveType_DivFusion, schema::ActivationType_RELU6, "ElementDivRelu6Fp16", "", "", "", ""}, 509+ {PrimitiveType_DivFusion, schema::ActivationType_NO_ACTIVATION, "ElementDivFp16", "", "", "", ""}, 510+ {PrimitiveType_RealDiv, schema::ActivationType_RELU, "ElementDivReluFp16", "", "", "", ""}, 511+ {PrimitiveType_RealDiv, schema::ActivationType_RELU6, "ElementDivRelu6Fp16", "", "", "", ""}, 512+ {PrimitiveType_RealDiv, schema::ActivationType_NO_ACTIVATION, "ElementDivFp16", "", "", "", ""}, 513+ {PrimitiveType_LogicalAnd, schema::ActivationType_NO_ACTIVATION, "ElementLogicalAndFp16", "", "", "", ""}, 514+ {PrimitiveType_LogicalOr, schema::ActivationType_NO_ACTIVATION, "ElementLogicalOrFp16", "", "", "", ""}, 515+ {PrimitiveType_Maximum, schema::ActivationType_NO_ACTIVATION, "ElementMaximumFp16", "", "", "", ""}, 516+ {PrimitiveType_Minimum, schema::ActivationType_NO_ACTIVATION, "ElementMinimumFp16", "", "", "", ""}, 517+ {PrimitiveType_FloorMod, schema::ActivationType_NO_ACTIVATION, "ElementFloorModFp16", "", "", "", ""}, 518+ {PrimitiveType_FloorDiv, schema::ActivationType_NO_ACTIVATION, "ElementFloorDivFp16", "", "", "", ""}, 519+ {PrimitiveType_SquaredDifference, schema::ActivationType_NO_ACTIVATION, "ElementSquaredDifferenceFp16", "", "", "", 520+ ""}}; 521+} 522+ 523+int ArithmeticFP16Coder::Prepare(CoderContext *const context) { 524+ if (input_tensor_->data_type() != kNumberTypeFloat16 || 525+ input_tensors_.at(kWeightIndex)->data_type() != kNumberTypeFloat16 || 526+ output_tensor_->data_type() != kNumberTypeFloat16) { 527+ MS_LOG(ERROR) << "Tensor data type is invalid"; 528+ return lite::RET_INPUT_PARAM_INVALID; 529+ } 530+ return ArithmeticFP32Coder::Prepare(context); 531+} 532+ 533+int ArithmeticFP16Coder::ReSize(CoderContext *const context) { 534+ CalcMultiplesAndStrides(arithmetic_parameter_); 535+ return RET_OK; 536+} 537+ 538+int ArithmeticFP16Coder::ExecuteCode(const std::string &input0, const std::string &input1, const std::string &output, 539+ int size, CoderContext *const context, NNaclFp32Serializer *const code) { 540+ if (arithmetic_func_str_.empty()) { 541+ return RET_ERROR; 542+ } 543+ for (size_t i = 0; i < fun_table_.size(); i++) { 544+ if (fun_table_[i].primitive_type_ == arithmetic_parameter_->op_parameter_.type_ && 545+ fun_table_[i].activation_type_ == arithmetic_parameter_->activation_type_) { 546+ code->CodeFunction(fun_table_[i].func_, input0, input1, output, size); 547+ break; 548+ } 549+ } 550+ context->AppendCode(code->str()); 551+ return RET_OK; 552+} 553+ 554+int ArithmeticFP16Coder::DoCode(CoderContext *const context) { 555+ int element_num = output_tensor_->ElementsNum(); 556+ input0_ptr_str_ = allocator_->GetRuntimeAddr(input_tensor_, input_tensor_->IsConst()); 557+ input1_ptr_str_ = allocator_->GetRuntimeAddr(filter_tensor_, filter_tensor_->IsConst()); 558+ output_ptr_str_ = allocator_->GetRuntimeAddr(output_tensor_); 559+ NNaclFp32Serializer code; 560+ Collect(context, 561+ { 562+ "nnacl/fp16/arithmetic_fp16.h", 563+ "nnacl/base/broadcast_to.h", 564+ }, 565+ { 566+ "arithmetic_fp16.c", 567+ "arithmetic_base.c", 568+ "broadcast_to.c", 569+ }); 570+ 571+ // all elements eltwise calculation 572+ ChooseArithmeticFunc(false); 573+ auto in0_shape = input_tensor_->shape(); 574+ auto in1_shape = filter_tensor_->shape(); 575+ auto out_shape = output_tensor_->shape(); 576+ BroadcastShapeInfo broadcast_info; 577+ auto ret = memset_s(&broadcast_info, sizeof(BroadcastShapeInfo), 0, sizeof(BroadcastShapeInfo)); 578+ MS_CHECK_TRUE_MSG(ret == EOK, RET_ERROR, "memset failed."); 579+ ret = memcpy_s(broadcast_info.output_shape_, MAX_SHAPE_SIZE * sizeof(int), out_shape.data(), 580+ out_shape.size() * sizeof(int)); 581+ MS_CHECK_TRUE_MSG(ret == EOK, RET_ERROR, "memcpy output-info failed."); 582+ broadcast_info.output_shape_size_ = static_cast<int>(out_shape.size()); 583+ if (in0_shape != out_shape) { 584+ ret = memcpy_s(broadcast_info.input_shape_, MAX_SHAPE_SIZE * sizeof(int), in0_shape.data(), 585+ in0_shape.size() * sizeof(int)); 586+ MS_CHECK_TRUE_MSG(ret == EOK, RET_ERROR, "memcpy in0-info failed."); 587+ broadcast_info.input_shape_size_ = static_cast<int>(in0_shape.size()); 588+ code.CodeStruct("in0_broadcast_info", broadcast_info); 589+ code.CodeFunction("BroadcastToSize16", input0_ptr_str_, "&in0_broadcast_info", output_ptr_str_); 590+ input0_ptr_str_ = output_ptr_str_; 591+ } 592+ if (in1_shape != out_shape) { 593+ ret = memcpy_s(broadcast_info.input_shape_, MAX_SHAPE_SIZE * sizeof(int), in1_shape.data(), 594+ in1_shape.size() * sizeof(int)); 595+ MS_CHECK_TRUE_MSG(ret == EOK, RET_ERROR, "memcpy in1-info failed."); 596+ broadcast_info.input_shape_size_ = static_cast<int>(in1_shape.size()); 597+ code.CodeStruct("in1_broadcast_info", broadcast_info); 598+ auto temp = output_ptr_str_; 599+ if (input0_ptr_str_ == output_ptr_str_) { 600+ auto temp_data = allocator_->Malloc(kNumberTypeFloat16, output_tensor_->Size(), kWorkspace); 601+ MS_CHECK_TRUE_MSG(temp_data != nullptr, RET_NULL_PTR, "malloc running buffer failed."); 602+ temp = allocator_->GetRuntimeAddr(temp_data); 603+ } 604+ code.CodeFunction("BroadcastToSize16", input1_ptr_str_, "&in1_broadcast_info", temp); 605+ input1_ptr_str_ = temp; 606+ } 607+ return ExecuteCode(input0_ptr_str_, input1_ptr_str_, output_ptr_str_, element_num, context, &code); 608+} 609+ 610+REG_OPERATOR_CODER(kARM32, kNumberTypeFloat16, PrimitiveType_AddFusion, CPUOpCoderCreator<ArithmeticFP16Coder>) 611+REG_OPERATOR_CODER(kARM32, kNumberTypeFloat16, PrimitiveType_MulFusion, CPUOpCoderCreator<ArithmeticFP16Coder>) 612+REG_OPERATOR_CODER(kARM32, kNumberTypeFloat16, PrimitiveType_SubFusion, CPUOpCoderCreator<ArithmeticFP16Coder>) 613+REG_OPERATOR_CODER(kARM32, kNumberTypeFloat16, PrimitiveType_DivFusion, CPUOpCoderCreator<ArithmeticFP16Coder>) 614+REG_OPERATOR_CODER(kARM32, kNumberTypeFloat16, PrimitiveType_RealDiv, CPUOpCoderCreator<ArithmeticFP16Coder>) 615+REG_OPERATOR_CODER(kARM32, kNumberTypeFloat16, PrimitiveType_LogicalAnd, CPUOpCoderCreator<ArithmeticFP16Coder>) 616+REG_OPERATOR_CODER(kARM32, kNumberTypeFloat16, PrimitiveType_LogicalOr, CPUOpCoderCreator<ArithmeticFP16Coder>) 617+REG_OPERATOR_CODER(kARM32, kNumberTypeFloat16, PrimitiveType_Maximum, CPUOpCoderCreator<ArithmeticFP16Coder>) 618+REG_OPERATOR_CODER(kARM32, kNumberTypeFloat16, PrimitiveType_Minimum, CPUOpCoderCreator<ArithmeticFP16Coder>) 619+REG_OPERATOR_CODER(kARM32, kNumberTypeFloat16, PrimitiveType_FloorDiv, CPUOpCoderCreator<ArithmeticFP16Coder>) 620+REG_OPERATOR_CODER(kARM32, kNumberTypeFloat16, PrimitiveType_FloorMod, CPUOpCoderCreator<ArithmeticFP16Coder>) 621+REG_OPERATOR_CODER(kARM32, kNumberTypeFloat16, PrimitiveType_SquaredDifference, CPUOpCoderCreator<ArithmeticFP16Coder>) 622+REG_OPERATOR_CODER(kARM32, kNumberTypeFloat16, PrimitiveType_Equal, CPUOpCoderCreator<ArithmeticFP16Coder>) 623+REG_OPERATOR_CODER(kARM32, kNumberTypeFloat16, PrimitiveType_NotEqual, CPUOpCoderCreator<ArithmeticFP16Coder>) 624+REG_OPERATOR_CODER(kARM32, kNumberTypeFloat16, PrimitiveType_Less, CPUOpCoderCreator<ArithmeticFP16Coder>) 625+REG_OPERATOR_CODER(kARM32, kNumberTypeFloat16, PrimitiveType_LessEqual, CPUOpCoderCreator<ArithmeticFP16Coder>) 626+REG_OPERATOR_CODER(kARM32, kNumberTypeFloat16, PrimitiveType_Greater, CPUOpCoderCreator<ArithmeticFP16Coder>) 627+REG_OPERATOR_CODER(kARM32, kNumberTypeFloat16, PrimitiveType_GreaterEqual, CPUOpCoderCreator<ArithmeticFP16Coder>) 628+REG_OPERATOR_CODER(kARM32, kNumberTypeFloat16, PrimitiveType_Eltwise, CPUOpCoderCreator<ArithmeticFP16Coder>) 629+REG_OPERATOR_CODER(kARM64, kNumberTypeFloat16, PrimitiveType_AddFusion, CPUOpCoderCreator<ArithmeticFP16Coder>) 630+REG_OPERATOR_CODER(kARM64, kNumberTypeFloat16, PrimitiveType_MulFusion, CPUOpCoderCreator<ArithmeticFP16Coder>) 631+REG_OPERATOR_CODER(kARM64, kNumberTypeFloat16, PrimitiveType_SubFusion, CPUOpCoderCreator<ArithmeticFP16Coder>) 632+REG_OPERATOR_CODER(kARM64, kNumberTypeFloat16, PrimitiveType_DivFusion, CPUOpCoderCreator<ArithmeticFP16Coder>) 633+REG_OPERATOR_CODER(kARM64, kNumberTypeFloat16, PrimitiveType_RealDiv, CPUOpCoderCreator<ArithmeticFP16Coder>) 634+REG_OPERATOR_CODER(kARM64, kNumberTypeFloat16, PrimitiveType_LogicalAnd, CPUOpCoderCreator<ArithmeticFP16Coder>) 635+REG_OPERATOR_CODER(kARM64, kNumberTypeFloat16, PrimitiveType_LogicalOr, CPUOpCoderCreator<ArithmeticFP16Coder>) 636+REG_OPERATOR_CODER(kARM64, kNumberTypeFloat16, PrimitiveType_Maximum, CPUOpCoderCreator<ArithmeticFP16Coder>) 637+REG_OPERATOR_CODER(kARM64, kNumberTypeFloat16, PrimitiveType_Minimum, CPUOpCoderCreator<ArithmeticFP16Coder>) 638+REG_OPERATOR_CODER(kARM64, kNumberTypeFloat16, PrimitiveType_FloorDiv, CPUOpCoderCreator<ArithmeticFP16Coder>) 639+REG_OPERATOR_CODER(kARM64, kNumberTypeFloat16, PrimitiveType_FloorMod, CPUOpCoderCreator<ArithmeticFP16Coder>) 640+REG_OPERATOR_CODER(kARM64, kNumberTypeFloat16, PrimitiveType_SquaredDifference, CPUOpCoderCreator<ArithmeticFP16Coder>) 641+REG_OPERATOR_CODER(kARM64, kNumberTypeFloat16, PrimitiveType_Equal, CPUOpCoderCreator<ArithmeticFP16Coder>) 642+REG_OPERATOR_CODER(kARM64, kNumberTypeFloat16, PrimitiveType_NotEqual, CPUOpCoderCreator<ArithmeticFP16Coder>) 643+REG_OPERATOR_CODER(kARM64, kNumberTypeFloat16, PrimitiveType_Less, CPUOpCoderCreator<ArithmeticFP16Coder>) 644+REG_OPERATOR_CODER(kARM64, kNumberTypeFloat16, PrimitiveType_LessEqual, CPUOpCoderCreator<ArithmeticFP16Coder>) 645+REG_OPERATOR_CODER(kARM64, kNumberTypeFloat16, PrimitiveType_Greater, CPUOpCoderCreator<ArithmeticFP16Coder>) 646+REG_OPERATOR_CODER(kARM64, kNumberTypeFloat16, PrimitiveType_GreaterEqual, CPUOpCoderCreator<ArithmeticFP16Coder>) 647+REG_OPERATOR_CODER(kARM64, kNumberTypeFloat16, PrimitiveType_Eltwise, CPUOpCoderCreator<ArithmeticFP16Coder>) 648+} // namespace mindspore::lite::micro::nnacl 649diff --git a/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp16/arithmetic_fp16_coder.h b/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp16/arithmetic_fp16_coder.h 650new file mode 100644 651index 00000000..60a83419 652--- /dev/null 653+++ b/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp16/arithmetic_fp16_coder.h 654@@ -0,0 +1,47 @@ 655+/** 656+ * Copyright 2023 Huawei Technologies Co., Ltd 657+ * 658+ * Licensed under the Apache License, Version 2.0 (the "License"); 659+ * you may not use this file except in compliance with the License. 660+ * You may obtain a copy of the License at 661+ * 662+ * http://www.apache.org/licenses/LICENSE-2.0 663+ * 664+ * Unless required by applicable law or agreed to in writing, software 665+ * distributed under the License is distributed on an "AS IS" BASIS, 666+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 667+ * See the License for the specific language governing permissions and 668+ * limitations under the License. 669+ */ 670+ 671+#ifndef MINDSPORE_LITE_TOOLS_CONVERTER_MICRO_CODER_OPCODERS_NNACL_FP16_ARITHMETIC_FP16_CODER_H_ 672+#define MINDSPORE_LITE_TOOLS_CONVERTER_MICRO_CODER_OPCODERS_NNACL_FP16_ARITHMETIC_FP16_CODER_H_ 673+ 674+#include <vector> 675+#include <string> 676+#include "coder/opcoders/nnacl/fp32/arithmetic_fp32_coder.h" 677+#include "nnacl/base/cast_base.h" 678+#include "coder/opcoders/serializers/nnacl_serializer/nnacl_fp32_serializer.h" 679+namespace mindspore::lite::micro::nnacl { 680+class ArithmeticFP16Coder final : public ArithmeticFP32Coder { 681+ public: 682+ ArithmeticFP16Coder(const std::vector<Tensor *> &in_tensors, const std::vector<Tensor *> &out_tensors, 683+ const LiteGraph::Node *node, size_t node_index, Target target) 684+ : ArithmeticFP32Coder(in_tensors, out_tensors, node, node_index, target) {} 685+ 686+ ~ArithmeticFP16Coder() override = default; 687+ 688+ int DoCode(CoderContext *const context) override; 689+ 690+ private: 691+ int Prepare(CoderContext *const context) override; 692+ 693+ int ReSize(CoderContext *const context) override; 694+ 695+ void InitFunTable() override; 696+ 697+ int ExecuteCode(const std::string &input0, const std::string &input1, const std::string &output, int size, 698+ CoderContext *const context, NNaclFp32Serializer *const code); 699+}; 700+} // namespace mindspore::lite::micro::nnacl 701+#endif // MINDSPORE_LITE_TOOLS_CONVERTER_MICRO_CODER_OPCODERS_NNACL_FP16_ARITHMETIC_FP16_CODER_H_ 702diff --git a/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp16/avg_pooling_fp16_coder.cc b/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp16/avg_pooling_fp16_coder.cc 703new file mode 100644 704index 00000000..97ca75a8 705--- /dev/null 706+++ b/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp16/avg_pooling_fp16_coder.cc 707@@ -0,0 +1,87 @@ 708+/** 709+ * Copyright 2023 Huawei Technologies Co., Ltd 710+ * 711+ * Licensed under the Apache License, Version 2.0 (the "License"); 712+ * you may not use this file except in compliance with the License. 713+ * You may obtain a copy of the License at 714+ * 715+ * http://www.apache.org/licenses/LICENSE-2.0 716+ * 717+ * Unless required by applicable law or agreed to in writing, software 718+ * distributed under the License is distributed on an "AS IS" BASIS, 719+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 720+ * See the License for the specific language governing permissions and 721+ * limitations under the License. 722+ */ 723+#include "coder/opcoders/nnacl/fp16/avg_pooling_fp16_coder.h" 724+#include <cfloat> 725+#include <string> 726+#include "coder/opcoders/serializers/nnacl_serializer/nnacl_fp32_serializer.h" 727+#include "coder/log.h" 728+#include "coder/opcoders/parallel.h" 729+#include "coder/opcoders/file_collector.h" 730+ 731+using mindspore::schema::PrimitiveType_AvgPoolFusion; 732+ 733+namespace mindspore::lite::micro::nnacl { 734+int PoolingFP16Coder::Prepare(CoderContext *const context) { 735+ if (input_tensor_->data_type() != kNumberTypeFloat16) { 736+ MS_LOG(INFO) << "Input tensor data type is invalid"; 737+ return RET_INPUT_PARAM_INVALID; 738+ } 739+ return RET_OK; 740+} 741+ 742+int PoolingFP16Coder::DoCode(CoderContext *const context) { 743+ // attribute 744+ auto pooling_parameter = reinterpret_cast<PoolingParameter *>(parameter_); 745+ MS_CHECK_PTR(pooling_parameter); 746+ // init struct PoolingParameters 747+ pooling_parameter->input_batch_ = input_tensor_->Batch(); 748+ pooling_parameter->input_channel_ = input_tensor_->Channel(); 749+ pooling_parameter->input_h_ = input_tensor_->Height(); 750+ pooling_parameter->input_w_ = input_tensor_->Width(); 751+ pooling_parameter->output_batch_ = output_tensor_->Batch(); 752+ pooling_parameter->output_channel_ = output_tensor_->Channel(); 753+ pooling_parameter->output_h_ = output_tensor_->Height(); 754+ pooling_parameter->output_w_ = output_tensor_->Width(); 755+ 756+ pooling_parameter->thread_num_ = pooling_parameter->op_parameter_.thread_num_; 757+ 758+ NNaclFp32Serializer code; 759+ std::string param_name = "pooling_parameter"; 760+ code.CodeStruct(param_name, *pooling_parameter); 761+ float minf = -FLT16_MAX; 762+ float maxf = FLT16_MAX; 763+ Collect(context, 764+ { 765+ "nnacl/fp16/pooling_fp16.h", 766+ }, 767+ { 768+ "pooling_fp16.c", 769+ }); 770+ switch (pooling_parameter->act_type_) { 771+ case ActType_Relu: { 772+ minf = 0.f; 773+ break; 774+ } 775+ case ActType_Relu6: { 776+ minf = 0.f; 777+ maxf = 6.f; 778+ break; 779+ } 780+ default: { 781+ MS_LOG(INFO) << "no actype"; 782+ break; 783+ } 784+ } 785+ code.CodeFunction("AvgPoolingFp16", input_tensor_, output_tensor_, "&pooling_parameter", kDefaultTaskId, minf, maxf); 786+ 787+ MS_LOG(INFO) << "PoolingFp16Code has been called"; 788+ context->AppendCode(code.str()); 789+ return lite::RET_OK; 790+} 791+ 792+REG_OPERATOR_CODER(kARM32, kNumberTypeFloat16, PrimitiveType_AvgPoolFusion, CPUOpCoderCreator<PoolingFP16Coder>) 793+REG_OPERATOR_CODER(kARM64, kNumberTypeFloat16, PrimitiveType_AvgPoolFusion, CPUOpCoderCreator<PoolingFP16Coder>) 794+} // namespace mindspore::lite::micro::nnacl 795diff --git a/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp16/avg_pooling_fp16_coder.h b/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp16/avg_pooling_fp16_coder.h 796new file mode 100644 797index 00000000..65a6522d 798--- /dev/null 799+++ b/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp16/avg_pooling_fp16_coder.h 800@@ -0,0 +1,36 @@ 801+/** 802+ * Copyright 2023 Huawei Technologies Co., Ltd 803+ * 804+ * Licensed under the Apache License, Version 2.0 (the "License"); 805+ * you may not use this file except in compliance with the License. 806+ * You may obtain a copy of the License at 807+ * 808+ * http://www.apache.org/licenses/LICENSE-2.0 809+ * 810+ * Unless required by applicable law or agreed to in writing, software 811+ * distributed under the License is distributed on an "AS IS" BASIS, 812+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 813+ * See the License for the specific language governing permissions and 814+ * limitations under the License. 815+ */ 816+ 817+#ifndef MINDSPORE_LITE_TOOLS_CONVERTER_MICRO_CODER_OPCODERS_NNACL_FP16_POOLING_FP16_CODER_H_ 818+#define MINDSPORE_LITE_TOOLS_CONVERTER_MICRO_CODER_OPCODERS_NNACL_FP16_POOLING_FP16_CODER_H_ 819+ 820+#include <vector> 821+#include "coder/opcoders/nnacl/fp32/pooling_fp32_coder.h" 822+ 823+namespace mindspore::lite::micro::nnacl { 824+class PoolingFP16Coder final : public PoolingFP32Coder { 825+ public: 826+ PoolingFP16Coder(const std::vector<Tensor *> &in_tensors, const std::vector<Tensor *> &out_tensors, 827+ const LiteGraph::Node *node, size_t node_index, Target target) 828+ : PoolingFP32Coder(in_tensors, out_tensors, node, node_index, target) {} 829+ ~PoolingFP16Coder() override = default; 830+ 831+ int Prepare(CoderContext *const context) override; 832+ 833+ int DoCode(CoderContext *const context) override; 834+}; 835+} // namespace mindspore::lite::micro::nnacl 836+#endif // MINDSPORE_LITE_MICRO_CODER_OPCODERS_FP16_CODER_H_ 837diff --git a/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp16/concat_fp16_coder.cc b/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp16/concat_fp16_coder.cc 838new file mode 100644 839index 00000000..fd969963 840--- /dev/null 841+++ b/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp16/concat_fp16_coder.cc 842@@ -0,0 +1,88 @@ 843+/** 844+ * Copyright 2023 Huawei Technologies Co., Ltd 845+ * 846+ * Licensed under the Apache License, Version 2.0 (the "License"); 847+ * you may not use this file except in compliance with the License. 848+ * You may obtain a copy of the License at 849+ * 850+ * http://www.apache.org/licenses/LICENSE-2.0 851+ * 852+ * Unless required by applicable law or agreed to in writing, software 853+ * distributed under the License is distributed on an "AS IS" BASIS, 854+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 855+ * See the License for the specific language governing permissions and 856+ * limitations under the License. 857+ */ 858+#include "coder/opcoders/nnacl/fp16/concat_fp16_coder.h" 859+#include <string> 860+#include <vector> 861+#include "coder/opcoders/serializers/nnacl_serializer/nnacl_fp32_serializer.h" 862+#include "coder/opcoders/file_collector.h" 863+#include "coder/opcoders/parallel.h" 864+ 865+using mindspore::schema::PrimitiveType_Concat; 866+ 867+namespace mindspore::lite::micro::nnacl { 868+int ConcatFP16Coder::Prepare(CoderContext *const context) { 869+ if (input_tensor_->data_type() != kNumberTypeFloat16) { 870+ MS_LOG(INFO) << "Input tensor data type is invalid"; 871+ return lite::RET_INPUT_PARAM_INVALID; 872+ } 873+ concat_param_ = reinterpret_cast<ConcatParameter *>(parameter_); 874+ MS_CHECK_PTR(concat_param_); 875+ return ReSize(); 876+} 877+ 878+int ConcatFP16Coder::ReSize() { 879+ axis_ = concat_param_->axis_ >= 0 ? concat_param_->axis_ 880+ : static_cast<int>(input_tensor_->shape().size()) + concat_param_->axis_; 881+ return RET_OK; 882+} 883+ 884+int ConcatFP16Coder::DoCode(CoderContext *const context) { 885+ Collect(context, 886+ { 887+ "nnacl/base/concat_base.h", 888+ }, 889+ { 890+ "concat_base.c", 891+ }); 892+ 893+ size_t input_num = input_tensors_.size(); 894+ 895+ NNaclFp32Serializer code; 896+ code << "\t\tvoid *inputs_addr[] = {"; 897+ for (size_t i = 0; i < input_num; ++i) { 898+ code << allocator_->GetRuntimeAddr(input_tensors_.at(i)) << ", "; 899+ } 900+ code << "};\n"; 901+ 902+ size_t i; 903+ for (i = 0; i < input_num; ++i) { 904+ code << "\t\tint shape_" << i << "[] = {"; 905+ for (auto &shape : input_tensors_.at(i)->shape()) { 906+ code << shape << ", "; 907+ } 908+ code << "};\n"; 909+ } 910+ 911+ code << "\t\tint shape_" << i << "[] = {"; 912+ for (auto &shape : output_tensor_->shape()) { 913+ code << shape << ", "; 914+ } 915+ code << "};\n"; 916+ 917+ code << "\t\tint *inputs_output_shape[] = {"; 918+ for (i = 0; i <= input_num; ++i) { 919+ code << "shape_" << i << ", "; 920+ } 921+ code << "};\n"; 922+ code.CodeFunction("Concat", "inputs_addr", input_num, axis_, "inputs_output_shape", output_tensor_->shape().size(), 923+ output_tensor_, 0, 1, sizeof(uint16_t)); 924+ context->AppendCode(code.str()); 925+ return RET_OK; 926+} 927+ 928+REG_OPERATOR_CODER(kARM32, kNumberTypeFloat16, PrimitiveType_Concat, CPUOpCoderCreator<ConcatFP16Coder>) 929+REG_OPERATOR_CODER(kARM64, kNumberTypeFloat16, PrimitiveType_Concat, CPUOpCoderCreator<ConcatFP16Coder>) 930+} // namespace mindspore::lite::micro::nnacl 931diff --git a/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp16/concat_fp16_coder.h b/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp16/concat_fp16_coder.h 932new file mode 100644 933index 00000000..6428ac6f 934--- /dev/null 935+++ b/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp16/concat_fp16_coder.h 936@@ -0,0 +1,42 @@ 937+/** 938+ * Copyright 2023 Huawei Technologies Co., Ltd 939+ * 940+ * Licensed under the Apache License, Version 2.0 (the "License"); 941+ * you may not use this file except in compliance with the License. 942+ * You may obtain a copy of the License at 943+ * 944+ * http://www.apache.org/licenses/LICENSE-2.0 945+ * 946+ * Unless required by applicable law or agreed to in writing, software 947+ * distributed under the License is distributed on an "AS IS" BASIS, 948+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 949+ * See the License for the specific language governing permissions and 950+ * limitations under the License. 951+ */ 952+ 953+#ifndef MINDSPORE_LITE_TOOLS_CONVERTER_MICRO_CODER_OPCODERS_NNACL_FP16_CONCAT_FP16_CODER_H_ 954+#define MINDSPORE_LITE_TOOLS_CONVERTER_MICRO_CODER_OPCODERS_NNACL_FP16_CONCAT_FP16_CODER_H_ 955+ 956+#include <vector> 957+#include "coder/opcoders/nnacl/fp32/concat_fp32_coder.h" 958+#include "nnacl/concat_parameter.h" 959+ 960+namespace mindspore::lite::micro::nnacl { 961+class ConcatFP16Coder final : public ConcatFP32Coder { 962+ public: 963+ ConcatFP16Coder(const std::vector<Tensor *> &in_tensors, const std::vector<Tensor *> &out_tensors, 964+ const LiteGraph::Node *node, size_t node_index, Target target) 965+ : ConcatFP32Coder(in_tensors, out_tensors, node, node_index, target) {} 966+ ~ConcatFP16Coder() override = default; 967+ 968+ int Prepare(CoderContext *const context) override; 969+ int DoCode(CoderContext *const context) override; 970+ 971+ private: 972+ int ReSize(); 973+ 974+ int axis_{0}; 975+ ConcatParameter *concat_param_{nullptr}; 976+}; 977+} // namespace mindspore::lite::micro::nnacl 978+#endif // MINDSPORE_LITE_TOOLS_CONVERTER_MICRO_CODER_OPCODERS_NNACL_FP16_CONCAT_FP16_CODER_H_ 979diff --git a/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp16/layernorm_fp16_coder.cc b/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp16/layernorm_fp16_coder.cc 980new file mode 100644 981index 00000000..8140786b 982--- /dev/null 983+++ b/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp16/layernorm_fp16_coder.cc 984@@ -0,0 +1,56 @@ 985+/** 986+ * Copyright 2023 Huawei Technologies Co., Ltd 987+ * 988+ * Licensed under the Apache License, Version 2.0 (the "License"); 989+ * you may not use this file except in compliance with the License. 990+ * You may obtain a copy of the License at 991+ * 992+ * http://www.apache.org/licenses/LICENSE-2.0 993+ * 994+ * Unless required by applicable law or agreed to in writing, software 995+ * distributed under the License is distributed on an "AS IS" BASIS, 996+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 997+ * See the License for the specific language governing permissions and 998+ * limitations under the License. 999+ */ 1000+#include "coder/opcoders/nnacl/fp16/layernorm_fp16_coder.h" 1001+#include "coder/opcoders/serializers/nnacl_serializer/nnacl_fp32_serializer.h" 1002+#include "coder/opcoders/file_collector.h" 1003+#include "coder/opcoders/parallel.h" 1004+ 1005+using mindspore::schema::PrimitiveType_LayerNormFusion; 1006+ 1007+namespace mindspore::lite::micro::nnacl { 1008+int LayerNormFP16Coder::Prepare(CoderContext *const context) { 1009+ if ((input_tensor_->data_type() != kNumberTypeFloat16) || 1010+ (input_tensors_.at(SECOND_INPUT)->data_type() != kNumberTypeFloat16) || 1011+ (input_tensors_.at(THIRD_INPUT)->data_type() != kNumberTypeFloat16)) { 1012+ MS_LOG(INFO) << "Input tensors data type is invalid"; 1013+ return RET_INPUT_PARAM_INVALID; 1014+ } 1015+ return LayerNormFP32Coder::Prepare(context); 1016+} 1017+ 1018+int LayerNormFP16Coder::DoCode(CoderContext *const context) { 1019+ NNaclFp32Serializer code; 1020+ code.CodeStruct("layer_norm_parm", *param_); 1021+ Collect(context, {"nnacl/fp16/layer_norm_fp16.h"}, {"layer_norm_fp16.c"}); 1022+ 1023+ if (output_tensors_.size() == C3NUM) { 1024+ code.CodeFunction("LayerNormFp16", input_tensor_, input_tensors_.at(SECOND_INPUT), input_tensors_.at(THIRD_INPUT), 1025+ output_tensor_, output_tensors_.at(SECOND_INPUT), output_tensors_.at(THIRD_INPUT), 1026+ "&layer_norm_parm", 0); 1027+ } else if (output_tensors_.size() == 1) { 1028+ code.CodeFunction("LayerNormFp16", input_tensor_, input_tensors_.at(SECOND_INPUT), input_tensors_.at(THIRD_INPUT), 1029+ output_tensor_, "NULL", "NULL", "&layer_norm_parm", 0); 1030+ } else { 1031+ MS_LOG(ERROR) << "LayerNorm should have 1 or 3 output tensors"; 1032+ return RET_ERROR; 1033+ } 1034+ context->AppendCode(code.str()); 1035+ return RET_OK; 1036+} 1037+ 1038+REG_OPERATOR_CODER(kARM32, kNumberTypeFloat16, PrimitiveType_LayerNormFusion, CPUOpCoderCreator<LayerNormFP16Coder>) 1039+REG_OPERATOR_CODER(kARM64, kNumberTypeFloat16, PrimitiveType_LayerNormFusion, CPUOpCoderCreator<LayerNormFP16Coder>) 1040+} // namespace mindspore::lite::micro::nnacl 1041diff --git a/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp16/layernorm_fp16_coder.h b/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp16/layernorm_fp16_coder.h 1042new file mode 100644 1043index 00000000..df025e3c 1044--- /dev/null 1045+++ b/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp16/layernorm_fp16_coder.h 1046@@ -0,0 +1,37 @@ 1047+/** 1048+ * Copyright 2023 Huawei Technologies Co., Ltd 1049+ * 1050+ * Licensed under the Apache License, Version 2.0 (the "License"); 1051+ * you may not use this file except in compliance with the License. 1052+ * You may obtain a copy of the License at 1053+ * 1054+ * http://www.apache.org/licenses/LICENSE-2.0 1055+ * 1056+ * Unless required by applicable law or agreed to in writing, software 1057+ * distributed under the License is distributed on an "AS IS" BASIS, 1058+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 1059+ * See the License for the specific language governing permissions and 1060+ * limitations under the License. 1061+ */ 1062+ 1063+#ifndef MINDSPORE_LITE_TOOLS_CONVERTER_MICRO_CODER_OPCODERS_NNACL_FP16_LAYERNORM_FP16_CODER_H_ 1064+#define MINDSPORE_LITE_TOOLS_CONVERTER_MICRO_CODER_OPCODERS_NNACL_FP16_LAYERNORM_FP16_CODER_H_ 1065+ 1066+#include <vector> 1067+#include "coder/opcoders/nnacl/fp32/layernorm_fp32_coder.h" 1068+#include "nnacl/layer_norm_parameter.h" 1069+ 1070+namespace mindspore::lite::micro::nnacl { 1071+class LayerNormFP16Coder final : public LayerNormFP32Coder { 1072+ public: 1073+ LayerNormFP16Coder(const std::vector<Tensor *> &in_tensors, const std::vector<Tensor *> &out_tensors, 1074+ const LiteGraph::Node *node, size_t node_index, Target target) 1075+ : LayerNormFP32Coder(in_tensors, out_tensors, node, node_index, target) {} 1076+ ~LayerNormFP16Coder() override = default; 1077+ 1078+ int Prepare(CoderContext *const context) override; 1079+ 1080+ int DoCode(CoderContext *const context) override; 1081+}; 1082+} // namespace mindspore::lite::micro::nnacl 1083+#endif // MINDSPORE_LITE_TOOLS_CONVERTER_MICRO_CODER_OPCODERS_NNACL_FP16_LAYERNORM_FP16_CODER_H_ 1084diff --git a/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp16/matmul_fp16_base_coder.cc b/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp16/matmul_fp16_base_coder.cc 1085new file mode 100644 1086index 00000000..f2aec9d2 1087--- /dev/null 1088+++ b/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp16/matmul_fp16_base_coder.cc 1089@@ -0,0 +1,286 @@ 1090+/** 1091+ * Copyright 2023 Huawei Technologies Co., Ltd 1092+ * 1093+ * Licensed under the Apache License, Version 2.0 (the "License"); 1094+ * you may not use this file except in compliance with the License. 1095+ * You may obtain a copy of the License at 1096+ * 1097+ * http://www.apache.org/licenses/LICENSE-2.0 1098+ * 1099+ * Unless required by applicable law or agreed to in writing, software 1100+ * distributed under the License is distributed on an "AS IS" BASIS, 1101+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 1102+ * See the License for the specific language governing permissions and 1103+ * limitations under the License. 1104+ */ 1105+ 1106+#include "coder/opcoders/nnacl/fp16/matmul_fp16_base_coder.h" 1107+#include <string> 1108+#include <vector> 1109+#include "coder/log.h" 1110+#include "coder/opcoders/parallel.h" 1111+#include "coder/opcoders/file_collector.h" 1112+#include "coder/opcoders/nnacl/dequant/de_quant.h" 1113+#include "nnacl/base/cast_base.h" 1114+ 1115+using mindspore::schema::PrimitiveType_MatMulFusion; 1116+ 1117+namespace mindspore::lite::micro::nnacl { 1118+int MatMulFP16BaseCoder::InitBiasData() { 1119+ if (bias_ptr_) { 1120+ return RET_OK; 1121+ } 1122+ bias_pack_ptr_size_ = static_cast<size_t>(params_->col_align_ * data_type_size_); 1123+ if (input_tensors_.size() == C3NUM) { 1124+ bias_ptr_ = allocator_->Malloc(kNumberTypeUInt8, kOnlineSize, kOnlinePackWeight, 1125+ bias_tensor_->tensor_name() + "_online_pack"); 1126+ } else { 1127+ bias_ptr_ = 1128+ allocator_->Malloc(kNumberTypeUInt8, kOnlineSize, kOnlinePackWeight, node_->name_ + "_bias_online_pack"); 1129+ } 1130+ return RET_OK; 1131+} 1132+ 1133+int MatMulFP16BaseCoder::InitBufferA() { 1134+ if (a_pack_ptr_ != nullptr || vec_matmul_) { 1135+ return RET_OK; 1136+ } 1137+ a_pack_ptr_size_ = static_cast<size_t>(params_->batch * params_->row_align_ * params_->deep_ * sizeof(uint16_t)); 1138+ if (params_->a_const_) { 1139+ a_pack_ptr_ = allocator_->GetSharedWeightAddr(input_tensors_.at(0)); 1140+ if (a_pack_ptr_ == nullptr) { 1141+ a_pack_ptr_ = allocator_->Malloc(kNumberTypeFloat16, kOnlineSize, kOnlinePackWeight, 1142+ input_tensors_.at(0)->tensor_name() + "_online_pack"); 1143+ allocator_->MarkSharedWeight(input_tensors_.at(0), a_pack_ptr_); 1144+ } else { 1145+ a_packed_ = true; 1146+ } 1147+ } else { 1148+ a_pack_ptr_ = allocator_->Malloc(kNumberTypeFloat16, a_pack_ptr_size_, kWorkspace); 1149+ } 1150+ MS_CHECK_PTR(a_pack_ptr_); 1151+ return RET_OK; 1152+} 1153+ 1154+int MatMulFP16BaseCoder::InitBufferB() { 1155+ if (target_ != kARM64) { 1156+ if (vec_matmul_ && params_->b_transpose_) { 1157+ return RET_OK; 1158+ } 1159+ } 1160+ return MatMulFP32BaseCoder::InitBufferB(); 1161+} 1162+ 1163+std::string MatMulFP16BaseCoder::InitMatrixA(NNaclFp32Serializer *const code, NNaclFp32Serializer *const init_code, 1164+ CoderContext *const context, size_t *w_buf) { 1165+ if (vec_matmul_) { 1166+ return allocator_->GetRuntimeAddr(input_tensor_, input_tensor_->IsConst()); 1167+ } 1168+ std::string input_a_str = allocator_->GetRuntimeAddr(input_tensor_); 1169+ std::string input_a_pack_str = "(float16_t *)" + allocator_->GetRuntimeAddr(a_pack_ptr_); 1170+ if (params_->a_const_) { 1171+ init_code->CodeBufferOffsetExpression(a_pack_ptr_, context->weight_name(), context->weight_offset_name(), 1172+ context->weight_size_name(), a_pack_ptr_size_); 1173+ *w_buf = *w_buf + a_pack_ptr_size_; 1174+ } 1175+ NNaclFp32Serializer &pack_node = params_->a_const_ ? *init_code : *code; 1176+ if (a_batch_ == 1) { 1177+ if (params_->a_transpose_) { 1178+ if (target_ == kARM64) { 1179+ pack_node.CodeFunction("RowMajor2RowNMajorFp16", input_a_str, input_a_pack_str, params_->deep_, params_->row_); 1180+ } else { 1181+ pack_node.CodeFunction("RowMajor2Row12MajorFp16", input_a_str, input_a_pack_str, params_->deep_, params_->row_, 1182+ false); 1183+ } 1184+ } else { 1185+ if (target_ == kARM64) { 1186+ pack_node.CodeFunction("RowMajor2ColNMajorFp16", input_a_str, input_a_pack_str, params_->row_, params_->deep_); 1187+ } else { 1188+ pack_node.CodeFunction("RowMajor2Col12MajorFp16", input_a_str, input_a_pack_str, params_->row_, params_->deep_, 1189+ false); 1190+ } 1191+ } 1192+ } else { 1193+ pack_node << " for (int i = 0; i < " << a_batch_ << "; ++i) {\n" 1194+ << " float16_t *src = " << input_a_str << " + i * " << params_->deep_ * params_->row_ << ";\n" 1195+ << " float16_t *dst = " << input_a_pack_str << " + i * " << params_->deep_ * params_->row_align_ 1196+ << ";\n"; 1197+ if (params_->a_transpose_) { 1198+ if (target_ == kARM64) { 1199+ pack_node << " RowMajor2RowNMajorFp16(src, dst, " << params_->deep_ << ", " << params_->row_ << ");\n"; 1200+ } else { 1201+ pack_node << " RowMajor2Row12MajorFp16(src, dst, " << params_->deep_ << ", " << params_->row_ 1202+ << ", false);\n"; 1203+ } 1204+ } else { 1205+ if (target_ == kARM64) { 1206+ pack_node << " RowMajor2ColNMajorFp16(src, dst, " << params_->row_ << ", " << params_->deep_ << ");\n"; 1207+ } else { 1208+ pack_node << " RowMajor2Col12MajorFp16(src, dst, " << params_->row_ << ", " << params_->deep_ 1209+ << ", false);\n"; 1210+ } 1211+ } 1212+ pack_node << " }\n"; 1213+ } 1214+ return input_a_pack_str; 1215+} 1216+ 1217+std::string MatMulFP16BaseCoder::InitMatrixB(NNaclFp32Serializer *const code, NNaclFp32Serializer *const init_code, 1218+ CoderContext *const context, size_t *w_buf) { 1219+ bool no_pack = target_ != kARM64 && vec_matmul_ && params_->b_transpose_; 1220+ if (no_pack) { 1221+ return allocator_->GetRuntimeAddr(filter_tensor_, filter_tensor_->IsConst()); 1222+ } 1223+ std::string input_b_str = allocator_->GetRuntimeAddr(filter_tensor_); 1224+ std::string input_b_pack_str = "(float16_t *)" + allocator_->GetRuntimeAddr(b_pack_ptr_); 1225+ if (params_->b_const_) { 1226+ init_code->CodeBufferOffsetExpression(b_pack_ptr_, context->weight_name(), context->weight_offset_name(), 1227+ context->weight_size_name(), b_pack_ptr_size_); 1228+ *w_buf = *w_buf + b_pack_ptr_size_; 1229+ } 1230+ NNaclFp32Serializer &pack_node = params_->b_const_ ? *init_code : *code; 1231+ if (target_ != kARM64) { 1232+ if (vec_matmul_) { 1233+ if (b_batch_ == 1) { 1234+ pack_node.CodeFunction("RowMajor2ColMajorFp16", input_b_str, input_b_pack_str, params_->deep_, params_->col_, 1235+ false); 1236+ } else { 1237+ pack_node << " for (int i = 0; i < " << b_batch_ << "; ++i) {\n" 1238+ << " float16_t *src = " << input_b_str << " + i * " << params_->deep_ * params_->col_ << ";\n" 1239+ << " float16_t *dst = " << input_b_pack_str << " + i * " << params_->deep_ * params_->col_ << ";\n" 1240+ << " RowMajor2ColMajorFp16(src, dst, " << params_->deep_ << ", " << params_->col_ << ", " 1241+ << "false);\n" 1242+ << " }\n"; 1243+ } 1244+ return input_b_pack_str; 1245+ } 1246+ } 1247+ 1248+ if (b_batch_ == 1) { 1249+ if (params_->b_transpose_) { 1250+ pack_node.CodeFunction("RowMajor2Col8MajorFp16", input_b_str, input_b_pack_str, params_->col_, params_->deep_, 1251+ false); 1252+ } else { 1253+ pack_node.CodeFunction("RowMajor2Row8MajorFp16", input_b_str, input_b_pack_str, params_->deep_, params_->col_, 1254+ false); 1255+ } 1256+ } else { 1257+ pack_node << " for (int i = 0; i < " << b_batch_ << "; ++i) {\n" 1258+ << " float16_t *src = " << input_b_str << " + i * " << params_->deep_ * params_->col_ << ";\n" 1259+ << " float16_t *dst = " << input_b_pack_str << " + i * " << params_->deep_ * params_->col_align_ 1260+ << ";\n"; 1261+ if (params_->b_transpose_) { 1262+ pack_node << " RowMajor2Col8MajorFp16(src, dst, " << params_->col_ << ", " << params_->deep_ << ", false);\n"; 1263+ } else { 1264+ pack_node << " RowMajor2Row8MajorFp16(src, dst, " << params_->deep_ << ", " << params_->col_ << ", false);\n"; 1265+ } 1266+ pack_node << " }\n"; 1267+ } 1268+ return input_b_pack_str; 1269+} 1270+ 1271+int MatMulFP16BaseCoder::Prepare(CoderContext *const context) { 1272+ if (input_tensor_->data_type() != kNumberTypeFloat16 || filter_tensor_->data_type() != kNumberTypeFloat16) { 1273+ MS_LOG(INFO) << "Input tensor data type is invalid"; 1274+ return RET_INPUT_PARAM_INVALID; 1275+ } 1276+ row_tile_ = C12NUM; 1277+ if (target_ == kARM64) { 1278+ row_tile_ = C4NUM; 1279+ } 1280+ auto ret = InitAShape(); 1281+ MS_CHECK_TRUE_MSG(ret == RET_OK, RET_ERROR, "init A-metrics' info failed"); 1282+ ret = InitBShape(); 1283+ MS_CHECK_TRUE_MSG(ret == RET_OK, RET_ERROR, "init B-metrics' info failed"); 1284+ if (params_->row_ == 1) { 1285+ vec_matmul_ = true; 1286+ } 1287+ if (vec_matmul_) { 1288+ params_->row_align_ = 1; 1289+ params_->col_align_ = (target_ == kARM64) ? UP_ROUND(params_->col_, C8NUM) : params_->col_; 1290+ } else { 1291+ params_->row_align_ = UP_ROUND(params_->row_, row_tile_); 1292+ params_->col_align_ = UP_ROUND(params_->col_, C8NUM); 1293+ } 1294+ MS_CHECK_RET_CODE(InitBufferA(), "InitBufferA failed"); 1295+ MS_CHECK_RET_CODE(InitBufferB(), "InitBufferB failed"); 1296+ MS_CHECK_RET_CODE(InitBiasData(), "InitBias failed"); 1297+ return RET_OK; 1298+} 1299+ 1300+int MatMulFP16BaseCoder::CollectFilesForTarget(CoderContext *const context) { 1301+ Collect(context, 1302+ { 1303+ "nnacl/fp16/pack_fp16.h", 1304+ "nnacl/fp16/matmul_fp16.h", 1305+ }, 1306+ { 1307+ "pack_fp16.c", 1308+ "matmul_fp16.c", 1309+ }); 1310+ if (target_ == kARM32) { 1311+ Collect(context, {}, {}, 1312+ { 1313+ "Matmul12x8Fp16.S", 1314+ "MatVecMulFp16.S", 1315+ }); 1316+ } else if (target_ == kARM64) { 1317+ Collect(context, {}, {}, 1318+ { 1319+ "MatmulFp16.S", 1320+ "MatmulFp16Opt.S", 1321+ "MatVecMulFp16.S", 1322+ "Matmul12X16Fp16.S", 1323+ "MatmulBaseFp16Neon.S", 1324+ "MatmulWinogradFp16.S", 1325+ "VecMatmulFp16.S", 1326+ }); 1327+ } 1328+ return RET_OK; 1329+} 1330+ 1331+int MatMulFP16BaseCoder::DoCode(CoderContext *const context) { 1332+ CollectFilesForTarget(context); 1333+ NNaclFp32Serializer code, init_code; 1334+ size_t w_buf_size = 0; 1335+ 1336+ // do bias packing to init 1337+ init_code.CodeBufferOffsetExpression(bias_ptr_, context->weight_name(), context->weight_offset_name(), 1338+ context->weight_size_name(), bias_pack_ptr_size_); 1339+ w_buf_size += bias_pack_ptr_size_; 1340+ std::string bias_str = "(float16_t *)" + allocator_->GetRuntimeAddr(bias_ptr_); 1341+ if (input_tensors_.size() == DIMENSION_3D) { 1342+ auto origin_bias_str = allocator_->GetRuntimeAddr(bias_tensor_); 1343+ init_code.CodeFunction("memcpy", bias_str, origin_bias_str, bias_tensor_->Size()); 1344+ } else { 1345+ init_code.CodeFunction("memset", bias_str, 0, bias_pack_ptr_size_); 1346+ } 1347+ 1348+ auto input_a_str = InitMatrixA(&code, &init_code, context, &w_buf_size); 1349+ auto input_b_str = InitMatrixB(&code, &init_code, context, &w_buf_size); 1350+ auto output_str = allocator_->GetRuntimeAddr(output_tensor_); 1351+ code << " for (int i = 0; i < " << params_->batch << "; ++i) {\n"; 1352+ if (vec_matmul_) { 1353+ code << " const float16_t *batch_a_ptr = " << input_a_str << " + i * " << params_->deep_ << ";\n"; 1354+ code << " const float16_t *batch_b_ptr = " << input_b_str << " + i * " 1355+ << params_->deep_ * (target_ == kARM64 ? params_->col_align_ : params_->col_) << ";\n"; 1356+ code << " float16_t *batch_c_ptr = " << output_str << " + i * " << params_->row_ * params_->col_ << ";\n "; 1357+ code.CodeFunction(target_ == kARM64 ? "VecMatmulFp16" : "MatVecMulFp16", "batch_a_ptr", "batch_b_ptr", 1358+ "batch_c_ptr", bias_str, params_->act_type_, params_->deep_, params_->col_); 1359+ } else { 1360+ code << " const float16_t *batch_a_ptr = " << input_a_str << " + i * " << params_->row_align_ * params_->deep_ 1361+ << ";\n"; 1362+ code << " const float16_t *batch_b_ptr = " << input_b_str << " + i * " << params_->deep_ * params_->col_align_ 1363+ << ";\n"; 1364+ code << " float16_t *batch_c_ptr = " << output_str << " + i * " << params_->row_ * params_->col_ << ";\n "; 1365+ code.CodeFunction(target_ == kARM64 ? "MatmulBaseFp16Neon" : "MatMulFp16", "batch_a_ptr", "batch_b_ptr", 1366+ "batch_c_ptr", bias_str, params_->act_type_, params_->deep_, params_->row_, params_->col_, 1367+ params_->col_, OutType_Nhwc); 1368+ } 1369+ code << " }\n"; 1370+ context->AppendInitWeightSizeCode(w_buf_size); 1371+ context->AppendCode(code.str()); 1372+ context->AppendInitCode(init_code.str()); 1373+ return RET_OK; 1374+} 1375+} // namespace mindspore::lite::micro::nnacl 1376diff --git a/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp16/matmul_fp16_base_coder.h b/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp16/matmul_fp16_base_coder.h 1377new file mode 100644 1378index 00000000..864f54ae 1379--- /dev/null 1380+++ b/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp16/matmul_fp16_base_coder.h 1381@@ -0,0 +1,58 @@ 1382+/** 1383+ * Copyright 2023 Huawei Technologies Co., Ltd 1384+ * 1385+ * Licensed under the Apache License, Version 2.0 (the "License"); 1386+ * you may not use this file except in compliance with the License. 1387+ * You may obtain a copy of the License at 1388+ * 1389+ * http://www.apache.org/licenses/LICENSE-2.0 1390+ * 1391+ * Unless required by applicable law or agreed to in writing, software 1392+ * distributed under the License is distributed on an "AS IS" BASIS, 1393+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 1394+ * See the License for the specific language governing permissions and 1395+ * limitations under the License. 1396+ */ 1397+ 1398+#ifndef MINDSPORE_LITE_TOOLS_CONVERTER_MICRO_CODER_OPCODERS_NNACL_FP16_MATMUL_FP16_BASE_CODER_H_ 1399+#define MINDSPORE_LITE_TOOLS_CONVERTER_MICRO_CODER_OPCODERS_NNACL_FP16_MATMUL_FP16_BASE_CODER_H_ 1400+ 1401+#include <vector> 1402+#include "coder/opcoders/nnacl/fp32/matmul_fp32_base_coder.h" 1403+#include "coder/opcoders/serializers/nnacl_serializer/nnacl_fp32_serializer.h" 1404+#include "nnacl/matmul_parameter.h" 1405+ 1406+namespace mindspore::lite::micro::nnacl { 1407+class MatMulFP16BaseCoder : public MatMulFP32BaseCoder { 1408+ public: 1409+ MatMulFP16BaseCoder(const std::vector<Tensor *> &in_tensors, const std::vector<Tensor *> &out_tensors, 1410+ const LiteGraph::Node *node, size_t node_index, Target target) 1411+ : MatMulFP32BaseCoder(in_tensors, out_tensors, node, node_index, target) {} 1412+ 1413+ ~MatMulFP16BaseCoder() override = default; 1414+ 1415+ int Prepare(CoderContext *const context) override; 1416+ 1417+ int DoCode(CoderContext *const context) override; 1418+ 1419+ private: 1420+ int InitBiasData() override; 1421+ int InitBufferA() override; 1422+ int InitBufferB() override; 1423+ std::string InitMatrixA(NNaclFp32Serializer *const code, NNaclFp32Serializer *const init_code, 1424+ CoderContext *const context, size_t *w_buf); 1425+ std::string InitMatrixB(NNaclFp32Serializer *const code, NNaclFp32Serializer *const init_code, 1426+ CoderContext *const context, size_t *w_buf); 1427+ int CollectFilesForTarget(CoderContext *const context) override; 1428+ 1429+ protected: 1430+ virtual int InitAShape() = 0; 1431+ virtual int InitBShape() = 0; 1432+ 1433+ protected: 1434+ int a_batch_ = 1; 1435+ int b_batch_ = 1; 1436+ int bias_count_ = 0; 1437+}; 1438+} // namespace mindspore::lite::micro::nnacl 1439+#endif // MINDSPORE_LITE_TOOLS_CONVERTER_MICRO_CODER_OPCODERS_NNACL_FP16_MATMUL_FP16_BASE_CODER_H_ 1440diff --git a/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp16/matmul_fp16_coder.cc b/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp16/matmul_fp16_coder.cc 1441new file mode 100644 1442index 00000000..26a3b923 1443--- /dev/null 1444+++ b/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp16/matmul_fp16_coder.cc 1445@@ -0,0 +1,79 @@ 1446+/** 1447+ * Copyright 2023 Huawei Technologies Co., Ltd 1448+ * 1449+ * Licensed under the Apache License, Version 2.0 (the "License"); 1450+ * you may not use this file except in compliance with the License. 1451+ * You may obtain a copy of the License at 1452+ * 1453+ * http://www.apache.org/licenses/LICENSE-2.0 1454+ * 1455+ * Unless required by applicable law or agreed to in writing, software 1456+ * distributed under the License is distributed on an "AS IS" BASIS, 1457+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 1458+ * See the License for the specific language governing permissions and 1459+ * limitations under the License. 1460+ */ 1461+ 1462+#include "coder/opcoders/nnacl/fp16/matmul_fp16_coder.h" 1463+#include <vector> 1464+#include "coder/log.h" 1465+#include "coder/opcoders/file_collector.h" 1466+ 1467+using mindspore::schema::PrimitiveType_MatMulFusion; 1468+ 1469+namespace mindspore::lite::micro::nnacl { 1470+int MatMulFP16Coder::InitAShape() { 1471+ std::vector<int> a_shape = input_tensor_->shape(); 1472+ MS_CHECK_TRUE_MSG(a_shape.size() >= DIMENSION_2D, RET_ERROR, "A-metric tensor's shape is invalid."); 1473+ int batch = 1; 1474+ for (size_t i = 0; i < a_shape.size() - DIMENSION_2D; ++i) { 1475+ batch *= a_shape.at(i); 1476+ } 1477+ a_batch_ = batch; 1478+ params_->batch = batch; 1479+ params_->row_ = params_->a_transpose_ ? a_shape[a_shape.size() - C1NUM] : a_shape[a_shape.size() - C2NUM]; 1480+ params_->deep_ = params_->a_transpose_ ? a_shape[a_shape.size() - C2NUM] : a_shape[a_shape.size() - C1NUM]; 1481+ params_->row_16_ = UP_ROUND(params_->row_, row_tile_); 1482+ return RET_OK; 1483+} 1484+ 1485+int MatMulFP16Coder::InitBShape() { 1486+ std::vector<int> b_shape = filter_tensor_->shape(); 1487+ MS_CHECK_TRUE_MSG(b_shape.size() >= DIMENSION_2D, RET_ERROR, "B-metric tensor's shape is invalid."); 1488+ int batch = 1; 1489+ for (size_t i = 0; i < b_shape.size() - DIMENSION_2D; ++i) { 1490+ batch *= b_shape[i]; 1491+ } 1492+ b_batch_ = batch; 1493+ params_->batch = batch; 1494+ params_->col_ = params_->b_transpose_ ? b_shape[b_shape.size() - C2NUM] : b_shape[b_shape.size() - C1NUM]; 1495+ params_->col_8_ = UP_ROUND(params_->col_, C8NUM); 1496+ params_->deep_ = params_->b_transpose_ ? b_shape[b_shape.size() - C1NUM] : b_shape[b_shape.size() - C2NUM]; 1497+ return RET_OK; 1498+} 1499+ 1500+int MatMulFP16Coder::Prepare(CoderContext *const context) { 1501+ if (input_tensor_->data_type() != kNumberTypeFloat16) { 1502+ MS_LOG(INFO) << "Input tensor data type is invalid"; 1503+ return RET_INPUT_PARAM_INVALID; 1504+ } 1505+ params_ = reinterpret_cast<MatMulParameter *>(parameter_); 1506+ MS_CHECK_TRUE_RET(input_tensors_.size() >= kBiasIndex, RET_ERROR); 1507+ filter_tensor_ = input_tensors_.at(kWeightIndex); 1508+ MS_CHECK_PTR(filter_tensor_); 1509+ if (input_tensors_.size() == kInputSize2) { 1510+ bias_tensor_ = input_tensors_.at(kBiasIndex); 1511+ MS_CHECK_PTR(bias_tensor_); 1512+ MS_CHECK_PTR(bias_tensor_->data()); 1513+ } 1514+ params_->a_const_ = (input_tensor_->data() != nullptr); 1515+ params_->b_const_ = (filter_tensor_->data() != nullptr); 1516+ MS_CHECK_RET_CODE(MatMulFP16BaseCoder::Prepare(context), "MatMulFP16Coder prepare failed"); 1517+ return RET_OK; 1518+} 1519+ 1520+int MatMulFP16Coder::DoCode(CoderContext *const context) { return MatMulFP16BaseCoder::DoCode(context); } 1521+ 1522+REG_OPERATOR_CODER(kARM64, kNumberTypeFloat16, PrimitiveType_MatMulFusion, CPUOpCoderCreator<MatMulFP16Coder>) 1523+REG_OPERATOR_CODER(kARM32, kNumberTypeFloat16, PrimitiveType_MatMulFusion, CPUOpCoderCreator<MatMulFP16Coder>) 1524+} // namespace mindspore::lite::micro::nnacl 1525diff --git a/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp16/matmul_fp16_coder.h b/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp16/matmul_fp16_coder.h 1526new file mode 100644 1527index 00000000..3a1cb66a 1528--- /dev/null 1529+++ b/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp16/matmul_fp16_coder.h 1530@@ -0,0 +1,44 @@ 1531+/** 1532+ * Copyright 2023 Huawei Technologies Co., Ltd 1533+ * 1534+ * Licensed under the Apache License, Version 2.0 (the "License"); 1535+ * you may not use this file except in compliance with the License. 1536+ * You may obtain a copy of the License at 1537+ * 1538+ * http://www.apache.org/licenses/LICENSE-2.0 1539+ * 1540+ * Unless required by applicable law or agreed to in writing, software 1541+ * distributed under the License is distributed on an "AS IS" BASIS, 1542+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 1543+ * See the License for the specific language governing permissions and 1544+ * limitations under the License. 1545+ */ 1546+ 1547+#ifndef MINDSPORE_LITE_TOOLS_CONVERTER_MICRO_CODER_OPCODERS_NNACL_FP16_MATMUL_FP16_CODER_H_ 1548+#define MINDSPORE_LITE_TOOLS_CONVERTER_MICRO_CODER_OPCODERS_NNACL_FP16_MATMUL_FP16_CODER_H_ 1549+ 1550+#include <vector> 1551+#include "coder/opcoders/nnacl/fp16/matmul_fp16_base_coder.h" 1552+#include "nnacl/matmul_parameter.h" 1553+ 1554+namespace mindspore::lite::micro::nnacl { 1555+class MatMulFP16Coder final : public MatMulFP16BaseCoder { 1556+ public: 1557+ MatMulFP16Coder(const std::vector<Tensor *> &in_tensors, const std::vector<Tensor *> &out_tensors, 1558+ const LiteGraph::Node *node, size_t node_index, Target target) 1559+ : MatMulFP16BaseCoder(in_tensors, out_tensors, node, node_index, target) { 1560+ data_type_size_ = sizeof(uint16_t); 1561+ } 1562+ 1563+ ~MatMulFP16Coder() override = default; 1564+ 1565+ int Prepare(CoderContext *const context) override; 1566+ 1567+ int DoCode(CoderContext *const context) override; 1568+ 1569+ private: 1570+ int InitAShape() override; 1571+ int InitBShape() override; 1572+}; 1573+} // namespace mindspore::lite::micro::nnacl 1574+#endif // MINDSPORE_LITE_TOOLS_CONVERTER_MICRO_CODER_OPCODERS_NNACL_FP16_MATMUL_FP16_CODER_H_ 1575diff --git a/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp16/reduce_fp16_coder.cc b/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp16/reduce_fp16_coder.cc 1576new file mode 100644 1577index 00000000..2f289085 1578--- /dev/null 1579+++ b/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp16/reduce_fp16_coder.cc 1580@@ -0,0 +1,75 @@ 1581+/** 1582+ * Copyright 2023 Huawei Technologies Co., Ltd 1583+ * 1584+ * Licensed under the Apache License, Version 2.0 (the "License"); 1585+ * you may not use this file except in compliance with the License. 1586+ * You may obtain a copy of the License at 1587+ * 1588+ * http://www.apache.org/licenses/LICENSE-2.0 1589+ * 1590+ * Unless required by applicable law or agreed to in writing, software 1591+ * distributed under the License is distributed on an "AS IS" BASIS, 1592+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 1593+ * See the License for the specific language governing permissions and 1594+ * limitations under the License. 1595+ */ 1596+ 1597+#include "coder/opcoders/nnacl/fp16/reduce_fp16_coder.h" 1598+#include "coder/opcoders/file_collector.h" 1599+ 1600+using mindspore::schema::PrimitiveType_ReduceFusion; 1601+namespace mindspore::lite::micro::nnacl { 1602+int ReduceFP16Coder::Prepare(CoderContext *const context) { 1603+ MS_CHECK_RET_CODE(ReduceBaseCoder::Init(), "init failed"); 1604+ data_type_ = ::kNumberTypeFloat16; 1605+ MS_CHECK_RET_CODE(ReduceBaseCoder::ReSize(), "resize failed"); 1606+ MS_CHECK_RET_CODE(ReduceFP32Coder::MallocTmpBuffer(kNumberTypeFloat16), "malloc buffer failed"); 1607+ return RET_OK; 1608+} 1609+ 1610+int ReduceFP16Coder::DoCode(CoderContext *const context) { 1611+ Collect(context, 1612+ { 1613+ "nnacl/fp16/reduce_fp16.h", 1614+ }, 1615+ { 1616+ "reduce_fp16.c", 1617+ }); 1618+ 1619+ // call the op function 1620+ switch (mode_) { 1621+ case static_cast<int>(schema::ReduceMode_ReduceSum): { 1622+ reduce_ = "ReduceSumFp16"; 1623+ break; 1624+ } 1625+ case static_cast<int>(schema::ReduceMode_ReduceMean): { 1626+ reduce_ = "ReduceMeanFp16"; 1627+ break; 1628+ } 1629+ case static_cast<int>(schema::ReduceMode_ReduceMax): { 1630+ reduce_ = "ReduceMaxFp16"; 1631+ break; 1632+ } 1633+ case static_cast<int>(schema::ReduceMode_ReduceMin): { 1634+ reduce_ = "ReduceMinFp16"; 1635+ break; 1636+ } 1637+ case static_cast<int>(schema::ReduceMode_ReduceProd): { 1638+ reduce_ = "ReduceProdFp16"; 1639+ break; 1640+ } 1641+ case static_cast<int>(schema::ReduceMode_ReduceL2): { 1642+ reduce_ = "ReduceL2NormFp16"; 1643+ break; 1644+ } 1645+ default: 1646+ MS_LOG(ERROR) << "Reduce unsupported reduce_ mode: " << mode_; 1647+ return RET_ERROR; 1648+ } 1649+ GenerateCode(context); 1650+ return RET_OK; 1651+} 1652+ 1653+REG_OPERATOR_CODER(kARM32, kNumberTypeFloat16, PrimitiveType_ReduceFusion, CPUOpCoderCreator<ReduceFP16Coder>) 1654+REG_OPERATOR_CODER(kARM64, kNumberTypeFloat16, PrimitiveType_ReduceFusion, CPUOpCoderCreator<ReduceFP16Coder>) 1655+} // namespace mindspore::lite::micro::nnacl 1656diff --git a/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp16/reduce_fp16_coder.h b/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp16/reduce_fp16_coder.h 1657new file mode 100644 1658index 00000000..2fcf8fb4 1659--- /dev/null 1660+++ b/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp16/reduce_fp16_coder.h 1661@@ -0,0 +1,40 @@ 1662+/** 1663+ * Copyright 2023 Huawei Technologies Co., Ltd 1664+ * 1665+ * Licensed under the Apache License, Version 2.0 (the "License"); 1666+ * you may not use this file except in compliance with the License. 1667+ * You may obtain a copy of the License at 1668+ * 1669+ * http://www.apache.org/licenses/LICENSE-2.0 1670+ * 1671+ * Unless required by applicable law or agreed to in writing, software 1672+ * distributed under the License is distributed on an "AS IS" BASIS, 1673+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 1674+ * See the License for the specific language governing permissions and 1675+ * limitations under the License. 1676+ */ 1677+ 1678+#ifndef MINDSPORE_LITE_TOOLS_CONVERTER_MICRO_CODER_OPCODERS_NNACL_FP16_REDUCE_FP16_CODER_H_ 1679+#define MINDSPORE_LITE_TOOLS_CONVERTER_MICRO_CODER_OPCODERS_NNACL_FP16_REDUCE_FP16_CODER_H_ 1680+ 1681+#include <string> 1682+#include <vector> 1683+#include "coder/opcoders/nnacl/fp32/reduce_fp32_coder.h" 1684+#include "coder/opcoders/base/reduce_base_coder.h" 1685+#include "coder/opcoders/op_coder.h" 1686+ 1687+namespace mindspore::lite::micro::nnacl { 1688+class ReduceFP16Coder final : public ReduceFP32Coder { 1689+ public: 1690+ ReduceFP16Coder(const std::vector<Tensor *> &in_tensors, const std::vector<Tensor *> &out_tensors, 1691+ const LiteGraph::Node *node, size_t node_index, Target target) 1692+ : ReduceFP32Coder(in_tensors, out_tensors, node, node_index, target) {} 1693+ 1694+ ~ReduceFP16Coder() override = default; 1695+ 1696+ int Prepare(CoderContext *const context) override; 1697+ 1698+ int DoCode(CoderContext *const context) override; 1699+}; 1700+} // namespace mindspore::lite::micro::nnacl 1701+#endif // MINDSPORE_LITE_TOOLS_CONVERTER_MICRO_CODER_OPCODERS_NNACL_FP16_REDUCE_FP16_CODER_H_ 1702diff --git a/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp16/resize_fp16_coder.cc b/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp16/resize_fp16_coder.cc 1703new file mode 100644 1704index 00000000..b6b07570 1705--- /dev/null 1706+++ b/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp16/resize_fp16_coder.cc 1707@@ -0,0 +1,108 @@ 1708+/** 1709+ * Copyright 2023 Huawei Technologies Co., Ltd 1710+ * 1711+ * Licensed under the Apache License, Version 2.0 (the "License"); 1712+ * you may not use this file except in compliance with the License. 1713+ * You may obtain a copy of the License at 1714+ * 1715+ * http://www.apache.org/licenses/LICENSE-2.0 1716+ * 1717+ * Unless required by applicable law or agreed to in writing, software 1718+ * distributed under the License is distributed on an "AS IS" BASIS, 1719+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 1720+ * See the License for the specific language governing permissions and 1721+ * limitations under the License. 1722+ */ 1723+ 1724+#include "coder/opcoders/nnacl/fp16/resize_fp16_coder.h" 1725+#include <string> 1726+#include <map> 1727+#include <utility> 1728+#include "coder/opcoders/serializers/nnacl_serializer/nnacl_fp32_serializer.h" 1729+#include "coder/opcoders/file_collector.h" 1730+#include "coder/opcoders/parallel.h" 1731+#include "coder/utils/common.h" 1732+#include "nnacl/fp32/resize_fp32.h" 1733+ 1734+using mindspore::schema::PrimitiveType_Resize; 1735+ 1736+namespace mindspore::lite::micro::nnacl { 1737+int ResizeFP16Coder::DataTypeLen() { return sizeof(uint16_t); } 1738+ 1739+int ResizeFP16Coder::DoCode(CoderContext *const context) { 1740+ Collect(context, 1741+ { 1742+ "nnacl/fp16/resize_fp16.h", 1743+ "nnacl/fp32/resize_fp32.h", 1744+ }, 1745+ { 1746+ "resize_fp16.c", 1747+ "resize_fp32.c", 1748+ }); 1749+ nnacl::NNaclFp32Serializer code; 1750+ code.CodeArray("input_shape", input_tensor_->shape().data(), input_tensor_->shape().size(), true); 1751+ code.CodeArray("output_shape", output_tensor_->shape().data(), output_tensor_->shape().size(), true); 1752+ std::vector<uint16_t> y_weights(y_weight_len_); 1753+ Float32ToFp16(y_weights_, y_weights.data(), y_weight_len_); 1754+ std::vector<uint16_t> x_weights(x_weight_len_); 1755+ Float32ToFp16(x_weights_, x_weights.data(), x_weight_len_); 1756+ int unit = UP_DIV(new_height_, kDefaultThreadNum); 1757+ int h_begin = unit * kDefaultTaskId; 1758+ int h_end = std::min(h_begin + unit, new_height_); 1759+ int channel = input_tensor_->Channel(); 1760+ 1761+ switch (method_) { 1762+ case static_cast<int>(schema::ResizeMethod_LINEAR): { 1763+ code.CodeArray("y_bottoms", coordinate_.y_bottoms_, y_len_, true); 1764+ code.CodeArray("y_tops", coordinate_.y_tops_, y_len_, true); 1765+ code.CodeArray("x_lefts", coordinate_.x_lefts_, x_len_, true); 1766+ code.CodeArray("x_rights", coordinate_.x_rights_, x_len_, true); 1767+ code.CodeArray("y_weights", y_weights.data(), y_weight_len_, true); 1768+ code.CodeArray("x_weights", x_weights.data(), x_weight_len_, true); 1769+ 1770+ code.CodeFunction("PrepareResizeBilinearFp16", "input_shape", "output_shape", calculate_str_, "(int *)y_bottoms", 1771+ "(int *)y_tops", "(int *)x_lefts", "(int *)x_rights", "(float16_t *)y_weights", 1772+ "(float16_t *)x_weights"); 1773+ code << " float16_t *line0 = (float16_t *)" << MemoryAllocator::GetInstance()->GetRuntimeAddr(line_buffer_) 1774+ << " + " << new_width_ << " * 2 * " << kDefaultTaskId << ";\n"; 1775+ code << " float16_t *line1 = line0 + " << new_width_ << " * " << channel << ";\n"; 1776+ code.CodeFunction("ResizeBilinearFp16", input_tensor_, output_tensor_, "input_shape", "output_shape", "y_bottoms", 1777+ "y_tops", "x_lefts", "x_rights", "(float16_t *)y_weights", "(float16_t *)x_weights", "line0", 1778+ "line1", h_begin, h_end); 1779+ break; 1780+ } 1781+ case static_cast<int>(schema::ResizeMethod_NEAREST): { 1782+ code.CodeFunction("ResizeNearestNeighborFp16", input_tensor_, output_tensor_, "input_shape", "output_shape", 1783+ calculate_str_, coordinate_transform_mode_, kDefaultTaskId, kDefaultThreadNum); 1784+ break; 1785+ } 1786+ case static_cast<int>(schema::ResizeMethod_CUBIC): { 1787+ code.CodeArray("y_tops", coordinate_.y_tops_, y_len_, true); 1788+ code.CodeArray("x_lefts", coordinate_.x_lefts_, x_len_, true); 1789+ code.CodeArray("y_weights", y_weights.data(), y_weight_len_, true); 1790+ code.CodeArray("x_weights", x_weights.data(), x_weight_len_, true); 1791+ auto resize_parameter = reinterpret_cast<ResizeParameter *>(parameter_); 1792+ MS_CHECK_PTR(resize_parameter); 1793+ auto cubic_coeff_str = "(float16_t)" + std::to_string(resize_parameter->cubic_coeff_); 1794+ code.CodeFunction("PrepareResizeBicubicFp16", "input_shape", "output_shape", calculate_str_, "(int *)y_tops", 1795+ "(int *)x_lefts", "(float16_t *)y_weights", "(float16_t *)x_weights", cubic_coeff_str); 1796+ auto buffer_str = "(float16_t *)" + MemoryAllocator::GetInstance()->GetRuntimeAddr(line_buffer_) + " + " + 1797+ std::to_string(new_width_ * channel * 4 * kDefaultTaskId); 1798+ 1799+ code.CodeFunction("ResizeBicubicFp16", input_tensor_, output_tensor_, "input_shape", "output_shape", "y_tops", 1800+ "x_lefts", "(float16_t *)y_weights", "(float16_t *)x_weights", buffer_str, h_begin, h_end); 1801+ break; 1802+ } 1803+ default: { 1804+ MS_LOG(ERROR) << "Resize unknown method " << method_; 1805+ return RET_ERROR; 1806+ } 1807+ } 1808+ 1809+ context->AppendCode(code.str()); 1810+ return RET_OK; 1811+} 1812+ 1813+REG_OPERATOR_CODER(kARM32, kNumberTypeFloat16, PrimitiveType_Resize, CPUOpCoderCreator<ResizeFP16Coder>) 1814+REG_OPERATOR_CODER(kARM64, kNumberTypeFloat16, PrimitiveType_Resize, CPUOpCoderCreator<ResizeFP16Coder>) 1815+} // namespace mindspore::lite::micro::nnacl 1816diff --git a/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp16/resize_fp16_coder.h b/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp16/resize_fp16_coder.h 1817new file mode 100644 1818index 00000000..0a050f59 1819--- /dev/null 1820+++ b/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp16/resize_fp16_coder.h 1821@@ -0,0 +1,41 @@ 1822+/** 1823+ * Copyright 2023 Huawei Technologies Co., Ltd 1824+ * 1825+ * Licensed under the Apache License, Version 2.0 (the "License"); 1826+ * you may not use this file except in compliance with the License. 1827+ * You may obtain a copy of the License at 1828+ * 1829+ * http://www.apache.org/licenses/LICENSE-2.0 1830+ * 1831+ * Unless required by applicable law or agreed to in writing, software 1832+ * distributed under the License is distributed on an "AS IS" BASIS, 1833+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 1834+ * See the License for the specific language governing permissions and 1835+ * limitations under the License. 1836+ */ 1837+ 1838+#ifndef MINDSPORE_LITE_TOOLS_CONVERTER_MICRO_CODER_OPCODERS_NNACL_FP16_RESIZE_FP16_CODER_H_ 1839+#define MINDSPORE_LITE_TOOLS_CONVERTER_MICRO_CODER_OPCODERS_NNACL_FP16_RESIZE_FP16_CODER_H_ 1840+ 1841+#include "coder/opcoders/nnacl/fp32/resize_fp32_coder.h" 1842+#include <vector> 1843+#include <algorithm> 1844+#include <string> 1845+#include "include/errorcode.h" 1846+#include "src/runtime/kernel_exec.h" 1847+#include "nnacl/base/cast_base.h" 1848+ 1849+namespace mindspore::lite::micro::nnacl { 1850+class ResizeFP16Coder : public ResizeFP32Coder { 1851+ public: 1852+ ResizeFP16Coder(const std::vector<Tensor *> &in_tensors, const std::vector<Tensor *> &out_tensors, 1853+ const LiteGraph::Node *node, size_t node_index, Target target) 1854+ : ResizeFP32Coder(in_tensors, out_tensors, node, node_index, target) {} 1855+ ~ResizeFP16Coder() override { FreeTmpBuffer(); }; 1856+ int DoCode(CoderContext *const context) override; 1857+ 1858+ private: 1859+ int DataTypeLen() override; 1860+}; 1861+} // namespace mindspore::lite::micro::nnacl 1862+#endif // MINDSPORE_LITE_TOOLS_CONVERTER_MICRO_CODER_OPCODERS_NNACL_FP16_RESIZE_FP16_CODER_H_ 1863diff --git a/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp16/transpose_fp16_coder.cc b/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp16/transpose_fp16_coder.cc 1864new file mode 100644 1865index 00000000..cddcac53 1866--- /dev/null 1867+++ b/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp16/transpose_fp16_coder.cc 1868@@ -0,0 +1,140 @@ 1869+/** 1870+ * Copyright 2023 Huawei Technologies Co., Ltd 1871+ * 1872+ * Licensed under the Apache License, Version 2.0 (the "License"); 1873+ * you may not use this file except in compliance with the License. 1874+ * You may obtain a copy of the License at 1875+ * 1876+ * http://www.apache.org/licenses/LICENSE-2.0 1877+ * 1878+ * Unless required by applicable law or agreed to in writing, software 1879+ * distributed under the License is distributed on an "AS IS" BASIS, 1880+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 1881+ * See the License for the specific language governing permissions and 1882+ * limitations under the License. 1883+ */ 1884+ 1885+#include "coder/opcoders/nnacl/fp16/transpose_fp16_coder.h" 1886+#include <vector> 1887+#include <unordered_set> 1888+#include "coder/opcoders/serializers/nnacl_serializer/nnacl_fp32_serializer.h" 1889+#include "coder/opcoders/file_collector.h" 1890+#include "coder/opcoders/parallel.h" 1891+#include "coder/utils/common.h" 1892+ 1893+using mindspore::schema::PrimitiveType_Transpose; 1894+namespace mindspore::lite::micro::nnacl { 1895+int TransposeFp16Coder::Prepare(CoderContext *const context) { 1896+ if (input_tensor_->data_type() != kNumberTypeFloat16) { 1897+ MS_LOG(INFO) << "Input tensor data type is invalid"; 1898+ return lite::RET_INPUT_PARAM_INVALID; 1899+ } 1900+ MS_CHECK_RET_CODE(Init(), "init failed"); 1901+ return RET_OK; 1902+} 1903+ 1904+int TransposeFp16Coder::ResetStatus() { 1905+ param_->num_axes_ = 0; 1906+ if (input_tensors_.size() == C2NUM) { 1907+ param_->num_axes_ = input_tensors_[SECOND_INPUT]->ElementsNum(); 1908+ } 1909+ auto in_shape = input_tensors_[FIRST_INPUT]->shape(); 1910+ if (in_shape.size() > MAX_TRANSPOSE_DIM_SIZE) { 1911+ MS_LOG(ERROR) << "input shape out of range."; 1912+ return RET_ERROR; 1913+ } 1914+ int trans_nd[MAX_TRANSPOSE_DIM_SIZE] = {0, 2, 1}; 1915+ int *perm_data{nullptr}; 1916+ if (in_shape.size() != static_cast<size_t>(param_->num_axes_)) { 1917+ perm_data = trans_nd; 1918+ if (in_shape.size() == C3NUM && param_->num_axes_ == C4NUM) { 1919+ param_->num_axes_ = C3NUM; 1920+ } 1921+ if (param_->num_axes_ == 0) { 1922+ for (int i = 0; i < static_cast<int>(in_shape.size()); ++i) { 1923+ trans_nd[i] = static_cast<int>(in_shape.size()) - 1 - i; 1924+ } 1925+ param_->num_axes_ = static_cast<int>(in_shape.size()); 1926+ } 1927+ } else { 1928+ MS_ASSERT(input_tensors_.size() == C2NUM); 1929+ auto perm_tensor = input_tensors_.at(SECOND_INPUT); 1930+ if (perm_tensor->data_type() != kNumberTypeInt32) { 1931+ MS_LOG(ERROR) << "Unsupported type id: " << perm_tensor->data_type() << " of perm tensor."; 1932+ return RET_ERROR; 1933+ } 1934+ perm_data = reinterpret_cast<int *>(perm_tensor->data()); 1935+ MSLITE_CHECK_PTR(perm_data); 1936+ std::vector<int> perm(perm_data, perm_data + input_tensors_[SECOND_INPUT]->ElementsNum()); 1937+ if (perm.size() != std::unordered_set<int>(perm.cbegin(), perm.cend()).size()) { 1938+ MS_LOG(ERROR) << "Invalid perm, the same element exits in perm."; 1939+ return RET_ERROR; 1940+ } 1941+ } 1942+ MS_CHECK_TRUE_MSG(param_->num_axes_ <= MAX_TRANSPOSE_DIM_SIZE, RET_ERROR, "transpose perm is invalid."); 1943+ for (int i = 0; i < param_->num_axes_; ++i) { 1944+ param_->perm_[i] = perm_data[i]; 1945+ } 1946+ return RET_OK; 1947+} 1948+ 1949+int TransposeFp16Coder::ComputeOfflineInfo() { 1950+ auto in_shape = input_tensor_->shape(); 1951+ auto out_shape = output_tensor_->shape(); 1952+ param_->strides_[param_->num_axes_ - 1] = 1; 1953+ param_->out_strides_[param_->num_axes_ - 1] = 1; 1954+ param_->data_num_ = input_tensor_->ElementsNum(); 1955+ for (int i = param_->num_axes_ - 2; i >= 0; i--) { 1956+ param_->strides_[i] = in_shape.at(i + 1) * param_->strides_[i + 1]; 1957+ param_->out_strides_[i] = out_shape.at(i + 1) * param_->out_strides_[i + 1]; 1958+ } 1959+ return RET_OK; 1960+} 1961+ 1962+int TransposeFp16Coder::Resize() { 1963+ auto ret = ResetStatus(); 1964+ if (ret != RET_OK) { 1965+ MS_LOG(ERROR) << "Do transpose reset failed."; 1966+ return ret; 1967+ } 1968+ if (input_tensors_[FIRST_INPUT]->shape().size() != static_cast<size_t>(param_->num_axes_)) { 1969+ return RET_OK; 1970+ } 1971+ ret = ComputeOfflineInfo(); 1972+ if (ret != RET_OK) { 1973+ MS_LOG(ERROR) << "Do compute transpose offline info failed."; 1974+ return ret; 1975+ } 1976+ thread_num_ = 1; 1977+ return RET_OK; 1978+} 1979+ 1980+int TransposeFp16Coder::DoCode(CoderContext *const context) { 1981+ Collect(context, 1982+ { 1983+ "nnacl/transpose.h", 1984+ "nnacl/errorcode.h", 1985+ "nnacl/fp16/transpose_fp16.h", 1986+ }, 1987+ { 1988+ "transpose_fp16.c", 1989+ }); 1990+ 1991+ NNaclFp32Serializer code; 1992+ if (input_tensor_->data() != output_tensor_->data()) { 1993+ code.CodeFunction("memcpy", output_tensor_, input_tensor_, input_tensor_->Size()); 1994+ context->AppendCode(code.str()); 1995+ } 1996+ 1997+ auto out_shape = output_tensor_->shape(); 1998+ dims_ = static_cast<int>(out_shape.size()); 1999+ code.CodeArray("output_shape", out_shape.data(), dims_, true); 2000+ code.CodeStruct("trans_param", *param_); 2001+ code.CodeFunction("DoTransposeFp16", input_tensor_, output_tensor_, "output_shape", "&trans_param"); 2002+ context->AppendCode(code.str()); 2003+ return RET_OK; 2004+} 2005+ 2006+REG_OPERATOR_CODER(kARM32, kNumberTypeFloat16, PrimitiveType_Transpose, CPUOpCoderCreator<TransposeFp16Coder>) 2007+REG_OPERATOR_CODER(kARM64, kNumberTypeFloat16, PrimitiveType_Transpose, CPUOpCoderCreator<TransposeFp16Coder>) 2008+} // namespace mindspore::lite::micro::nnacl 2009diff --git a/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp16/transpose_fp16_coder.h b/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp16/transpose_fp16_coder.h 2010new file mode 100644 2011index 00000000..240c470e 2012--- /dev/null 2013+++ b/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp16/transpose_fp16_coder.h 2014@@ -0,0 +1,43 @@ 2015+/** 2016+ * Copyright 2023 Huawei Technologies Co., Ltd 2017+ * 2018+ * Licensed under the Apache License, Version 2.0 (the "License"); 2019+ * you may not use this file except in compliance with the License. 2020+ * You may obtain a copy of the License at 2021+ * 2022+ * http://www.apache.org/licenses/LICENSE-2.0 2023+ * 2024+ * Unless required by applicable law or agreed to in writing, software 2025+ * distributed under the License is distributed on an "AS IS" BASIS, 2026+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 2027+ * See the License for the specific language governing permissions and 2028+ * limitations under the License. 2029+ */ 2030+ 2031+#ifndef MINDSPORE_LITE_TOOLS_CONVERTER_MICRO_CODER_OPCODERS_NNACL_FP16_TRANSPOSE_FP16_CODER_H_ 2032+#define MINDSPORE_LITE_TOOLS_CONVERTER_MICRO_CODER_OPCODERS_NNACL_FP16_TRANSPOSE_FP16_CODER_H_ 2033+#include <vector> 2034+#include <string> 2035+#include "coder/opcoders/nnacl/fp32/transpose_fp32_coder.h" 2036+#include "nnacl/transpose.h" 2037+namespace mindspore::lite::micro::nnacl { 2038+class TransposeFp16Coder final : public TransposeFp32Coder { 2039+ public: 2040+ TransposeFp16Coder(const std::vector<Tensor *> &in_tensors, const std::vector<Tensor *> &out_tensors, 2041+ const LiteGraph::Node *node, size_t node_index, Target target) 2042+ : TransposeFp32Coder(in_tensors, out_tensors, node, node_index, target) {} 2043+ 2044+ ~TransposeFp16Coder() override = default; 2045+ 2046+ int Prepare(CoderContext *const context) override; 2047+ 2048+ int Resize() override; 2049+ 2050+ int DoCode(CoderContext *const context) override; 2051+ 2052+ private: 2053+ int ResetStatus(); 2054+ int ComputeOfflineInfo(); 2055+}; 2056+} // namespace mindspore::lite::micro::nnacl 2057+#endif // MINDSPORE_LITE_TOOLS_CONVERTER_MICRO_CODER_OPCODERS_NNACL_FP16_TRANSPOSE_FP16_CODER_H_ 2058diff --git a/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp32/activation_fp32_coder.cc b/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp32/activation_fp32_coder.cc 2059index 35fc1819..edc442e9 100644 2060--- a/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp32/activation_fp32_coder.cc 2061+++ b/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp32/activation_fp32_coder.cc 2062@@ -14,9 +14,7 @@ 2063 * limitations under the License. 2064 */ 2065 #include "coder/opcoders/nnacl/fp32/activation_fp32_coder.h" 2066-#include <string> 2067 #include "nnacl/fp32/activation_fp32.h" 2068-#include "nnacl/op_base.h" 2069 #include "coder/opcoders/serializers/nnacl_serializer/nnacl_fp32_serializer.h" 2070 #include "coder/opcoders/file_collector.h" 2071 #include "coder/opcoders/parallel.h" 2072@@ -65,9 +63,15 @@ int ActivationFP32Coder::DoCode(CoderContext *const context) { 2073 case schema::ActivationType_HSWISH: 2074 code.CodeFunction("HSwish", input_tensor_, count, output_tensor_); 2075 break; 2076+ case schema::ActivationType_SWISH: 2077+ code.CodeFunction("Swish", input_tensor_, count, output_tensor_); 2078+ break; 2079 case schema::ActivationType_HSIGMOID: 2080 code.CodeFunction("HSigmoid", input_tensor_, count, output_tensor_); 2081 break; 2082+ case schema::ActivationType_ELU: 2083+ code.CodeFunction("Elu", input_tensor_, count, output_tensor_, activation_parameter->alpha_); 2084+ break; 2085 default: 2086 MS_LOG(ERROR) << "Activation type error"; 2087 return RET_ERROR; 2088diff --git a/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp32/activation_fp32_coder.h b/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp32/activation_fp32_coder.h 2089index 67be7e6d..f688d4bd 100644 2090--- a/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp32/activation_fp32_coder.h 2091+++ b/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp32/activation_fp32_coder.h 2092@@ -21,7 +21,7 @@ 2093 #include "coder/opcoders/op_coder.h" 2094 2095 namespace mindspore::lite::micro::nnacl { 2096-class ActivationFP32Coder final : public OperatorCoder { 2097+class ActivationFP32Coder : public OperatorCoder { 2098 public: 2099 ActivationFP32Coder(const std::vector<Tensor *> &in_tensors, const std::vector<Tensor *> &out_tensors, 2100 const LiteGraph::Node *node, size_t node_index, Target target) 2101diff --git a/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp32/arithmetic_fp32_coder.cc b/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp32/arithmetic_fp32_coder.cc 2102index d7cc8726..49e53cf9 100644 2103--- a/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp32/arithmetic_fp32_coder.cc 2104+++ b/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp32/arithmetic_fp32_coder.cc 2105@@ -27,8 +27,8 @@ std::string wrap_uint8(const std::string &a) { return "(uint8_t *)(" + a + ")"; 2106 std::string wrap_offset(const std::string &a, int offset) { return "(" + a + "+" + std::to_string(offset) + ")"; } 2107 } // namespace 2108 2109-void ArithmeticFP32Coder::InitRunFunction(int primitive_type) { 2110- ARITHMETIC_FUNC_INFO_FP32 fun_table[] = { 2111+void ArithmeticFP32Coder::InitFunTable() { 2112+ fun_table_ = { 2113 {PrimitiveType_MulFusion, schema::ActivationType_RELU, "ElementMulRelu", "ElementMulReluInt", "", 2114 "ElementOptMulRelu", "ElementOptMulReluInt"}, 2115 {PrimitiveType_MulFusion, schema::ActivationType_RELU6, "ElementMulRelu6", "ElementMulRelu6Int", "", 2116@@ -63,21 +63,23 @@ void ArithmeticFP32Coder::InitRunFunction(int primitive_type) { 2117 "ElementOptModInt"}, 2118 {PrimitiveType_SquaredDifference, schema::ActivationType_NO_ACTIVATION, "ElementSquaredDifference", "", "", "", 2119 ""}}; 2120+} 2121 2122- size_t length = sizeof(fun_table) / sizeof(ARITHMETIC_FUNC_INFO_FP32); 2123- for (size_t i = 0; i < length; i++) { 2124- if (fun_table[i].primitive_type_ == primitive_type && 2125- fun_table[i].activation_type_ == arithmetic_parameter_->activation_type_) { 2126- arithmetic_run_ = fun_table[i].func_; 2127- arithmetic_run_int_ = fun_table[i].int_func_; 2128- arithmetic_run_bool_ = fun_table[i].bool_func_; 2129- arithmetic_opt_run_ = fun_table[i].opt_func_; 2130- arithmetic_opt_run_int_ = fun_table[i].opt_int_func_; 2131+void ArithmeticFP32Coder::InitRunFunction(int primitive_type) { 2132+ InitFunTable(); 2133+ for (size_t i = 0; i < fun_table_.size(); i++) { 2134+ if (fun_table_[i].primitive_type_ == primitive_type && 2135+ fun_table_[i].activation_type_ == arithmetic_parameter_->activation_type_) { 2136+ arithmetic_run_ = fun_table_[i].func_; 2137+ arithmetic_run_int_ = fun_table_[i].int_func_; 2138+ arithmetic_run_bool_ = fun_table_[i].bool_func_; 2139+ arithmetic_opt_run_ = fun_table_[i].opt_func_; 2140+ arithmetic_opt_run_int_ = fun_table_[i].opt_int_func_; 2141 } 2142 } 2143 TypeId input_type_id = input_tensor_->data_type(); 2144 data_type_len_ = lite::DataTypeSize(input_tensor_->data_type()); 2145- if (input_type_id == kNumberTypeFloat32 || input_type_id == kNumberTypeFloat) { 2146+ if (input_type_id == kNumberTypeFloat32 || input_type_id == kNumberTypeFloat || input_type_id == kNumberTypeFloat16) { 2147 arithmetic_func_type_ = kArithmeticFuncFloat; 2148 } else if (input_type_id == kNumberTypeBool) { 2149 arithmetic_func_type_ = kArithmeticFuncBool; 2150@@ -122,7 +124,8 @@ int ArithmeticFP32Coder::CheckDataType() { 2151 } 2152 2153 void ArithmeticFP32Coder::ChooseArithmeticFunc(bool is_opt) { 2154- if (input_tensor_->data_type() == kNumberTypeFloat32) { 2155+ if (input_tensor_->data_type() == kNumberTypeFloat32 || input_tensor_->data_type() == kNumberTypeFloat || 2156+ input_tensor_->data_type() == kNumberTypeFloat16) { 2157 if (is_opt) { 2158 arithmetic_func_str_ = wrap_void(arithmetic_opt_run_); 2159 } else { 2160@@ -204,13 +207,8 @@ int ArithmeticFP32Coder::ConstTensorBroadCast(CoderContext *const context) { 2161 } 2162 FreeConstTileBuff(); 2163 NNaclFp32Serializer init_code; 2164- Collect(context, 2165- { 2166- "wrapper/fp32/arithmetic_fp32_wrapper.h", 2167- }, 2168- { 2169- "arithmetic_fp32_wrapper.c", 2170- }); 2171+ Collect(context, {"wrapper/fp32/arithmetic_fp32_wrapper.h", "nnacl/fp32/arithmetic_fp32.h"}, 2172+ {"arithmetic_fp32_wrapper.c", "arithmetic_fp32.c"}); 2173 if (input_tensor_->IsConst() && 2174 arithmetic_parameter_->in_elements_num0_ != arithmetic_parameter_->out_elements_num_) { 2175 input0_ptr_ = reinterpret_cast<float *>( 2176diff --git a/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp32/arithmetic_fp32_coder.h b/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp32/arithmetic_fp32_coder.h 2177index e1ec51fe..169ed457 100644 2178--- a/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp32/arithmetic_fp32_coder.h 2179+++ b/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp32/arithmetic_fp32_coder.h 2180@@ -66,7 +66,7 @@ using mindspore::schema::PrimitiveType_Minimum; 2181 2182 using mindspore::schema::PrimitiveType_Mod; 2183 2184-class ArithmeticFP32Coder final : public OperatorCoder { 2185+class ArithmeticFP32Coder : public OperatorCoder { 2186 typedef struct { 2187 int primitive_type_; 2188 int activation_type_; 2189@@ -88,14 +88,7 @@ class ArithmeticFP32Coder final : public OperatorCoder { 2190 2191 int DoCode(CoderContext *const context) override; 2192 2193- private: 2194- int ReSize(CoderContext *const context); 2195- 2196- int ExecuteCode(const std::string &input0, const std::string &input1, const std::string &output, int size, 2197- bool is_opt, CoderContext *const context, NNaclFp32Serializer *const code); 2198- 2199- void InitRunFunction(int primitive_type); 2200- 2201+ protected: 2202 int CheckDataType(); 2203 2204 void ChooseArithmeticFunc(bool is_opt); 2205@@ -108,6 +101,16 @@ class ArithmeticFP32Coder final : public OperatorCoder { 2206 2207 void FreeConstTileBuff(); 2208 2209+ virtual void InitFunTable(); 2210+ 2211+ virtual int ReSize(CoderContext *const context); 2212+ 2213+ virtual void InitRunFunction(int primitive_type); 2214+ 2215+ private: 2216+ int ExecuteCode(const std::string &input0, const std::string &input1, const std::string &output, int size, 2217+ bool is_opt, CoderContext *const context, NNaclFp32Serializer *const code); 2218+ 2219 int ConstTensorBroadCast(CoderContext *const context); 2220 2221 void ComputeInOutStrides(); 2222@@ -121,7 +124,9 @@ class ArithmeticFP32Coder final : public OperatorCoder { 2223 2224 void CollectFilesForFunc(CoderContext *const context); 2225 2226- private: 2227+ protected: 2228+ std::vector<ARITHMETIC_FUNC_INFO_FP32> fun_table_; 2229+ 2230 int break_pos_{0}; 2231 2232 int outside_{0}; 2233@@ -148,10 +153,6 @@ class ArithmeticFP32Coder final : public OperatorCoder { 2234 2235 Tensor *filter_tensor_{nullptr}; 2236 2237- ArithmeticFuncType arithmetic_func_type_{kArithmeticFuncUnknow}; 2238- 2239- ArithmeticWrapperInfo arithmetic_wrapper_info_{}; 2240- 2241 std::string input0_ptr_str_; 2242 2243 std::string input1_ptr_str_; 2244@@ -169,6 +170,11 @@ class ArithmeticFP32Coder final : public OperatorCoder { 2245 std::string arithmetic_run_bool_; 2246 2247 std::string arithmetic_func_str_; 2248+ 2249+ private: 2250+ ArithmeticFuncType arithmetic_func_type_{kArithmeticFuncUnknow}; 2251+ 2252+ ArithmeticWrapperInfo arithmetic_wrapper_info_{}; 2253 }; 2254 } // namespace mindspore::lite::micro::nnacl 2255 #endif // MINDSPORE_LITE_TOOLS_CONVERTER_MICRO_CODER_OPCODERS_NNACL_FP32_ARITHMETIC_FP32_CODER_H_ 2256diff --git a/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp32/concat_fp32_coder.h b/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp32/concat_fp32_coder.h 2257index 67607e13..6f3f5c71 100644 2258--- a/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp32/concat_fp32_coder.h 2259+++ b/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp32/concat_fp32_coder.h 2260@@ -22,7 +22,7 @@ 2261 #include "nnacl/concat_parameter.h" 2262 2263 namespace mindspore::lite::micro::nnacl { 2264-class ConcatFP32Coder final : public OperatorCoder { 2265+class ConcatFP32Coder : public OperatorCoder { 2266 public: 2267 ConcatFP32Coder(const std::vector<Tensor *> &in_tensors, const std::vector<Tensor *> &out_tensors, 2268 const LiteGraph::Node *node, size_t node_index, Target target) 2269diff --git a/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp32/exp_fp32_coder.cc b/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp32/exp_fp32_coder.cc 2270index 65334f4b..e9bbca19 100644 2271--- a/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp32/exp_fp32_coder.cc 2272+++ b/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp32/exp_fp32_coder.cc 2273@@ -49,7 +49,7 @@ int ExpFP32Coder::DoCode(CoderContext *ctx) { 2274 }); 2275 nnacl::NNaclFp32Serializer code; 2276 code.CodeStruct("exp_parameter", *exp_parameter_); 2277- code.CodeFunction("exp", input_tensor_, output_tensor_, "(ExpParameter *)&exp_parameter", kDefaultTaskId); 2278+ code.CodeFunction("ExpFusionFp32", input_tensor_, output_tensor_, "(ExpParameter *)&exp_parameter", kDefaultTaskId); 2279 ctx->AppendCode(code.str()); 2280 return RET_OK; 2281 } 2282diff --git a/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp32/gather_fp32_coder.cc b/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp32/gather_fp32_coder.cc 2283index 087d4dde..3c31479c 100644 2284--- a/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp32/gather_fp32_coder.cc 2285+++ b/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp32/gather_fp32_coder.cc 2286@@ -94,4 +94,6 @@ int GatherFP32Coder::DoCode(CoderContext *context) { 2287 } 2288 2289 REG_OPERATOR_CODER(kAllTargets, kNumberTypeFloat32, PrimitiveType_Gather, CPUOpCoderCreator<GatherFP32Coder>) 2290+REG_OPERATOR_CODER(kAllTargets, kNumberTypeInt32, PrimitiveType_Gather, CPUOpCoderCreator<GatherFP32Coder>) 2291+REG_OPERATOR_CODER(kAllTargets, kNumberTypeFloat16, PrimitiveType_Gather, CPUOpCoderCreator<GatherFP32Coder>) 2292 } // namespace mindspore::lite::micro::nnacl 2293diff --git a/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp32/layernorm_fp32_coder.cc b/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp32/layernorm_fp32_coder.cc 2294new file mode 100644 2295index 00000000..992a58e4 2296--- /dev/null 2297+++ b/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp32/layernorm_fp32_coder.cc 2298@@ -0,0 +1,81 @@ 2299+/** 2300+ * Copyright 2023 Huawei Technologies Co., Ltd 2301+ * 2302+ * Licensed under the Apache License, Version 2.0 (the "License"); 2303+ * you may not use this file except in compliance with the License. 2304+ * You may obtain a copy of the License at 2305+ * 2306+ * http://www.apache.org/licenses/LICENSE-2.0 2307+ * 2308+ * Unless required by applicable law or agreed to in writing, software 2309+ * distributed under the License is distributed on an "AS IS" BASIS, 2310+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 2311+ * See the License for the specific language governing permissions and 2312+ * limitations under the License. 2313+ */ 2314+#include "coder/opcoders/nnacl/fp32/layernorm_fp32_coder.h" 2315+#include "coder/opcoders/serializers/nnacl_serializer/nnacl_fp32_serializer.h" 2316+#include "coder/opcoders/file_collector.h" 2317+#include "coder/opcoders/parallel.h" 2318+ 2319+using mindspore::schema::PrimitiveType_LayerNormFusion; 2320+ 2321+namespace mindspore::lite::micro::nnacl { 2322+namespace { 2323+constexpr size_t kOutputNum = 3; 2324+} 2325+int LayerNormFP32Coder::Prepare(CoderContext *const context) { 2326+ param_ = reinterpret_cast<LayerNormParameter *>(parameter_); 2327+ param_->op_parameter_.thread_num_ = 1; 2328+ auto shape = input_tensor_->shape(); 2329+ param_->begin_norm_axis_ = param_->begin_norm_axis_ >= 0 ? param_->begin_norm_axis_ 2330+ : param_->begin_norm_axis_ + static_cast<int>(shape.size()); 2331+ param_->begin_params_axis_ = param_->begin_params_axis_ >= 0 2332+ ? param_->begin_params_axis_ 2333+ : param_->begin_params_axis_ + static_cast<int>(shape.size()); 2334+ MS_CHECK_LT(param_->begin_norm_axis_, static_cast<int>(shape.size()), RET_ERROR); 2335+ MS_CHECK_LT(param_->begin_params_axis_, static_cast<int>(shape.size()), RET_ERROR); 2336+ param_->norm_outer_size_ = 1; 2337+ for (int i = 0; i < param_->begin_norm_axis_; ++i) { 2338+ MS_CHECK_FALSE_MSG(INT_MUL_OVERFLOW(param_->norm_outer_size_, shape.at(i)), RET_ERROR, "mul overflow."); 2339+ param_->norm_outer_size_ *= shape.at(i); 2340+ } 2341+ param_->norm_inner_size_ = 1; 2342+ for (size_t i = param_->begin_norm_axis_; i < shape.size(); ++i) { 2343+ MS_CHECK_FALSE_MSG(INT_MUL_OVERFLOW(param_->norm_inner_size_, shape.at(i)), RET_ERROR, "mul overflow."); 2344+ param_->norm_inner_size_ *= shape.at(i); 2345+ } 2346+ param_->params_outer_size_ = 1; 2347+ for (int i = 0; i < param_->begin_params_axis_; ++i) { 2348+ MS_CHECK_FALSE_MSG(INT_MUL_OVERFLOW(param_->params_outer_size_, shape.at(i)), RET_ERROR, "mul overflow."); 2349+ param_->params_outer_size_ *= shape.at(i); 2350+ } 2351+ param_->params_inner_size_ = 1; 2352+ for (size_t i = param_->begin_params_axis_; i < shape.size(); ++i) { 2353+ MS_CHECK_FALSE_MSG(INT_MUL_OVERFLOW(param_->params_inner_size_, shape.at(i)), RET_ERROR, "mul overflow."); 2354+ param_->params_inner_size_ *= shape.at(i); 2355+ } 2356+ return RET_OK; 2357+} 2358+ 2359+int LayerNormFP32Coder::DoCode(CoderContext *const context) { 2360+ NNaclFp32Serializer code; 2361+ code.CodeStruct("layer_norm_parm", *param_); 2362+ Collect(context, {"nnacl/fp32/layer_norm_fp32.h"}, {"layer_norm_fp32.c"}); 2363+ if (output_tensors_.size() == kOutputNum) { 2364+ code.CodeFunction("LayerNorm", input_tensor_, input_tensors_.at(SECOND_INPUT), input_tensors_.at(THIRD_INPUT), 2365+ output_tensor_, output_tensors_.at(SECOND_INPUT), output_tensors_.at(THIRD_INPUT), 2366+ "&layer_norm_parm", 0); 2367+ } else if (output_tensors_.size() == 1) { 2368+ code.CodeFunction("LayerNorm", input_tensor_, input_tensors_.at(SECOND_INPUT), input_tensors_.at(THIRD_INPUT), 2369+ output_tensor_, "NULL", "NULL", "&layer_norm_parm", 0); 2370+ } else { 2371+ return RET_ERROR; 2372+ } 2373+ context->AppendCode(code.str()); 2374+ return RET_OK; 2375+} 2376+ 2377+REG_OPERATOR_CODER(kAllTargets, kNumberTypeFloat32, PrimitiveType_LayerNormFusion, 2378+ CPUOpCoderCreator<LayerNormFP32Coder>) 2379+} // namespace mindspore::lite::micro::nnacl 2380diff --git a/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp32/layernorm_fp32_coder.h b/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp32/layernorm_fp32_coder.h 2381new file mode 100644 2382index 00000000..a14cff57 2383--- /dev/null 2384+++ b/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp32/layernorm_fp32_coder.h 2385@@ -0,0 +1,40 @@ 2386+/** 2387+ * Copyright 2022 Huawei Technologies Co., Ltd 2388+ * 2389+ * Licensed under the Apache License, Version 2.0 (the "License"); 2390+ * you may not use this file except in compliance with the License. 2391+ * You may obtain a copy of the License at 2392+ * 2393+ * http://www.apache.org/licenses/LICENSE-2.0 2394+ * 2395+ * Unless required by applicable law or agreed to in writing, software 2396+ * distributed under the License is distributed on an "AS IS" BASIS, 2397+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 2398+ * See the License for the specific language governing permissions and 2399+ * limitations under the License. 2400+ */ 2401+ 2402+#ifndef MINDSPORE_LITE_TOOLS_CONVERTER_MICRO_CODER_OPCODERS_NNACL_FP32_LAYERNORM_FP32_CODER_H_ 2403+#define MINDSPORE_LITE_TOOLS_CONVERTER_MICRO_CODER_OPCODERS_NNACL_FP32_LAYERNORM_FP32_CODER_H_ 2404+ 2405+#include <vector> 2406+#include "coder/opcoders/op_coder.h" 2407+#include "nnacl/layer_norm_parameter.h" 2408+ 2409+namespace mindspore::lite::micro::nnacl { 2410+class LayerNormFP32Coder : public OperatorCoder { 2411+ public: 2412+ LayerNormFP32Coder(const std::vector<Tensor *> &in_tensors, const std::vector<Tensor *> &out_tensors, 2413+ const LiteGraph::Node *node, size_t node_index, Target target) 2414+ : OperatorCoder(in_tensors, out_tensors, node, node_index, target) {} 2415+ ~LayerNormFP32Coder() override = default; 2416+ 2417+ int Prepare(CoderContext *const context) override; 2418+ 2419+ int DoCode(CoderContext *const context) override; 2420+ 2421+ protected: 2422+ LayerNormParameter *param_; 2423+}; 2424+} // namespace mindspore::lite::micro::nnacl 2425+#endif // MINDSPORE_LITE_TOOLS_CONVERTER_MICRO_CODER_OPCODERS_NNACL_FP32_LAYERNORM_FP32_CODER_H_ 2426diff --git a/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp32/lstm_fp32_coder.cc b/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp32/lstm_fp32_coder.cc 2427index 3bc8ea4b..561f6259 100644 2428--- a/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp32/lstm_fp32_coder.cc 2429+++ b/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp32/lstm_fp32_coder.cc 2430@@ -75,13 +75,6 @@ int LstmFP32Coder::InitStateWeightBias(CoderContext *const context) { 2431 w_buf_size += weight_h_size; 2432 init_code.CodeFunction("PackLstmWeight", weight_h_ptr_, weight_h, weight_batch_, lstm_param_->hidden_size_, 2433 lstm_param_->hidden_size_, lstm_param_->state_col_align_, "NULL"); 2434- } else { 2435- size_t weight_h_size = weight_h->Size(); 2436- weight_h_ptr_ = 2437- reinterpret_cast<float *>(allocator_->Malloc(kNumberTypeFloat32, weight_h->Size(), kOfflinePackWeight)); 2438- MS_CHECK_PTR(weight_h_ptr_); 2439- MS_CHECK_RET_CODE(memcpy_s(weight_h_ptr_, weight_h_size, weight_h->data(), weight_h_size), 2440- "copy weight h data failed"); 2441 } 2442 2443 state_bias_ = reinterpret_cast<float *>(allocator_->Malloc(kNumberTypeFloat32, kOnlineSize, kOnlinePackWeight)); 2444@@ -214,8 +207,15 @@ int LstmFP32Coder::DoCode(CoderContext *context) { 2445 code.CodeArray("buffer", buffers_addr.data(), buffers_addr.size(), false); 2446 code.CodeFunction("memcpy", output_hidden_state, hidden_state, hidden_state->Size()); 2447 code.CodeFunction("memcpy", output_cell_state, cell_state, cell_state->Size()); 2448- code.CodeFunction("Lstm", output_tensor_, input_tensor_, weight_i_ptr_, weight_h_ptr_, input_bias_, state_bias_, 2449- output_hidden_state, output_cell_state, "buffer", "&lstm_param"); 2450+ if (weight_h_ptr_ != nullptr) { 2451+ code.CodeFunction("Lstm", output_tensor_, input_tensor_, weight_i_ptr_, weight_h_ptr_, input_bias_, state_bias_, 2452+ output_hidden_state, output_cell_state, "buffer", "&lstm_param"); 2453+ } else { 2454+ auto *weight_h_tensor = input_tensors().at(kInputSize1); 2455+ auto weight_h = allocator_->GetRuntimeAddr(weight_h_tensor, weight_h_tensor->IsConst()); 2456+ code.CodeFunction("Lstm", output_tensor_, input_tensor_, weight_i_ptr_, weight_h, input_bias_, state_bias_, 2457+ output_hidden_state, output_cell_state, "buffer", "&lstm_param"); 2458+ } 2459 context->AppendCode(code.str()); 2460 return RET_OK; 2461 } 2462diff --git a/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp32/matmul_fp32_base_coder.cc b/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp32/matmul_fp32_base_coder.cc 2463index 26707a3e..790a142e 100644 2464--- a/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp32/matmul_fp32_base_coder.cc 2465+++ b/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp32/matmul_fp32_base_coder.cc 2466@@ -55,7 +55,8 @@ int MatMulFP32BaseCoder::InitBiasData() { 2467 is_bias_broadcast_ = true; 2468 } 2469 ori_bias_pack_ptr_size_ = bias_tensor_->ElementsNum() * sizeof(float); 2470- bias_ptr_ = reinterpret_cast<float *>(allocator_->Malloc(kNumberTypeFloat32, kOnlineSize, kOnlinePackWeight)); 2471+ bias_ptr_ = allocator_->Malloc(kNumberTypeFloat32, kOnlineSize, kOnlinePackWeight, 2472+ bias_tensor_->tensor_name() + "_online_pack"); 2473 MS_CHECK_PTR(bias_ptr_); 2474 } 2475 return RET_OK; 2476@@ -84,7 +85,14 @@ int MatMulFP32BaseCoder::InitBufferA() { 2477 } 2478 a_pack_ptr_size_ = static_cast<size_t>(params_->batch * params_->row_align_ * params_->deep_ * sizeof(float)); 2479 if (params_->a_const_) { 2480- a_pack_ptr_ = reinterpret_cast<float *>(allocator_->Malloc(kNumberTypeFloat32, kOnlineSize, kOnlinePackWeight)); 2481+ a_pack_ptr_ = reinterpret_cast<float *>(allocator_->GetSharedWeightAddr(input_tensors_.at(0))); 2482+ if (a_pack_ptr_ == nullptr) { 2483+ a_pack_ptr_ = reinterpret_cast<float *>(allocator_->Malloc(kNumberTypeFloat32, kOnlineSize, kOnlinePackWeight, 2484+ input_tensors_.at(0)->tensor_name() + "_online_pack")); 2485+ allocator_->MarkSharedWeight(input_tensors_.at(0), a_pack_ptr_); 2486+ } else { 2487+ a_packed_ = true; 2488+ } 2489 } else { 2490 a_pack_ptr_ = reinterpret_cast<float *>(allocator_->Malloc(kNumberTypeFloat32, a_pack_ptr_size_, kWorkspace)); 2491 } 2492@@ -96,23 +104,30 @@ int MatMulFP32BaseCoder::InitBufferB() { 2493 if (b_pack_ptr_ != nullptr) { 2494 return RET_OK; 2495 } 2496- b_pack_ptr_size_ = static_cast<size_t>(params_->batch * params_->col_align_ * params_->deep_ * sizeof(float)); 2497+ b_pack_ptr_size_ = static_cast<size_t>(params_->batch * params_->col_align_ * params_->deep_ * data_type_size_); 2498 if (params_->b_const_) { 2499- b_pack_ptr_ = reinterpret_cast<float *>(allocator_->Malloc(kNumberTypeFloat32, kOnlineSize, kOnlinePackWeight)); 2500+ b_pack_ptr_ = reinterpret_cast<float *>(allocator_->GetSharedWeightAddr(input_tensors_.at(1))); 2501+ if (b_pack_ptr_ == nullptr) { 2502+ b_pack_ptr_ = reinterpret_cast<float *>(allocator_->Malloc(kNumberTypeUInt8, b_pack_ptr_size_, kOnlinePackWeight, 2503+ input_tensors_.at(1)->tensor_name() + "_online_pack")); 2504+ allocator_->MarkSharedWeight(input_tensors_.at(1), b_pack_ptr_); 2505+ } else { 2506+ b_packed_ = true; 2507+ } 2508 } else { 2509- b_pack_ptr_ = reinterpret_cast<float *>(allocator_->Malloc(kNumberTypeFloat32, b_pack_ptr_size_, kWorkspace)); 2510+ b_pack_ptr_ = reinterpret_cast<float *>(allocator_->Malloc(kNumberTypeUInt8, b_pack_ptr_size_, kWorkspace)); 2511 } 2512 MS_CHECK_PTR(b_pack_ptr_); 2513 return RET_OK; 2514 } 2515 2516 int MatMulFP32BaseCoder::InitMatrixA(const float *src_ptr) { 2517- ::InitMatrixA(src_ptr, a_pack_ptr_, params_, vec_matmul_); 2518+ ::InitMatrixA(src_ptr, static_cast<float *>(a_pack_ptr_), params_, vec_matmul_); 2519 return RET_OK; 2520 } 2521 2522 int MatMulFP32BaseCoder::InitMatrixB(const float *src_ptr) { 2523- ::InitMatrixB(src_ptr, b_pack_ptr_, params_, vec_matmul_); 2524+ ::InitMatrixB(src_ptr, static_cast<float *>(b_pack_ptr_), params_, vec_matmul_); 2525 return RET_OK; 2526 } 2527 2528@@ -179,12 +194,11 @@ int MatMulFP32BaseCoder::DoCode(CoderContext *const context) { 2529 NNaclFp32Serializer code, init_code; 2530 size_t w_buf_size = 0; 2531 std::string param_name = "mat_mul_parameter"; 2532- 2533+ std::string bias_ptr_str = "((float *)(" + allocator_->GetRuntimeAddr(bias_ptr_) + "))"; 2534 code.CodeStruct(param_name, *params_); 2535 if (support_parallel_) { 2536 code << " " << param_name << ".op_parameter_.thread_num_ = 1;\n"; 2537 } 2538- init_code.CodeStruct("mat_mul_parameter", *params_); 2539 // do bias packing to init 2540 if (input_tensors_.size() == DIMENSION_3D) { 2541 init_code.CodeBufferOffsetExpression(bias_ptr_, context->weight_name(), context->weight_offset_name(), 2542@@ -193,7 +207,6 @@ int MatMulFP32BaseCoder::DoCode(CoderContext *const context) { 2543 int max_bias_data = params_->col_align_; 2544 if (is_bias_broadcast_) { 2545 float broad_cast_data = (reinterpret_cast<float *>(bias_tensor_->data()))[0]; 2546- std::string bias_ptr_str = "((float *)(" + allocator_->GetRuntimeAddr(bias_ptr_) + "))"; 2547 init_code << "\t for (int i = 0; i < " << max_bias_data << "; ++i) {\n"; 2548 init_code << "\t\t " << bias_ptr_str << "[i] = " << broad_cast_data << ";\n"; 2549 init_code << " }\n"; 2550@@ -209,44 +222,43 @@ int MatMulFP32BaseCoder::DoCode(CoderContext *const context) { 2551 std::string a_pack_str = allocator_->GetRuntimeAddr(a_pack_ptr_); 2552 std::string b_pack_str = allocator_->GetRuntimeAddr(b_pack_ptr_); 2553 // do const value packing to init 2554- if (!params_->a_const_) { 2555- code.CodeFunction("InitMatrixA", input_tensor_, a_pack_ptr_, "&mat_mul_parameter", vec_matmul_); 2556- if (!params_->b_const_) { 2557- init_code.CodeMallocExpression(b_pack_ptr_, b_pack_ptr_size_); 2558- init_code.CodeFunction("memset", b_pack_ptr_, 0, b_pack_ptr_size_); 2559- } else { 2560- init_code.CodeBufferOffsetExpression(b_pack_ptr_, context->weight_name(), context->weight_offset_name(), 2561- context->weight_size_name(), b_pack_ptr_size_); 2562- w_buf_size += b_pack_ptr_size_; 2563- } 2564- std::string b_src_str = b_str; 2565- if (de_quant_flag_) { 2566- // reuse to b_pack_str 2567- b_src_str = Dequant::GetInstance()->de_quant_buffer_str(); 2568- std::string de_quant_function = Dequant::GetInstance()->GetMicroDeQuantFunction(filter_tensor_, b_str); 2569- init_code << de_quant_function; 2570- } 2571- // b_pack_str has been memset, no need to memset 2572- init_code.CodeFunction("InitMatrixB", b_src_str, b_pack_ptr_, "&mat_mul_parameter", vec_matmul_); 2573+ if ((params_->a_const_ && !a_packed_) || (params_->b_const_ && !b_packed_)) { 2574+ init_code.CodeStruct("mat_mul_parameter", *params_); 2575 } 2576- if (!params_->b_const_) { 2577- if (!params_->a_const_) { 2578- init_code.CodeMallocExpression(a_pack_str, a_pack_ptr_size_); 2579- init_code.CodeFunction("memset", a_pack_ptr_, 0, a_pack_ptr_size_); 2580- } else { 2581+ if (params_->a_const_) { 2582+ if (!a_packed_) { 2583 init_code.CodeBufferOffsetExpression(a_pack_ptr_, context->weight_name(), context->weight_offset_name(), 2584 context->weight_size_name(), a_pack_ptr_size_); 2585 w_buf_size += a_pack_ptr_size_; 2586+ std::string a_src_str = a_str; 2587+ if (de_quant_flag_) { 2588+ // reuse to a_pack_str 2589+ a_src_str = Dequant::GetInstance()->de_quant_buffer_str(); 2590+ std::string de_quant_function = Dequant::GetInstance()->GetMicroDeQuantFunction(input_tensor_, a_str); 2591+ init_code << de_quant_function; 2592+ } 2593+ // a_pack_str has been memset, no need to memset 2594+ init_code.CodeFunction("InitMatrixA", a_src_str, a_pack_ptr_, "&mat_mul_parameter", vec_matmul_); 2595 } 2596- std::string a_src_str = a_str; 2597- if (de_quant_flag_) { 2598- // reuse to a_pack_str 2599- a_src_str = Dequant::GetInstance()->de_quant_buffer_str(); 2600- std::string de_quant_function = Dequant::GetInstance()->GetMicroDeQuantFunction(input_tensor_, a_str); 2601- init_code << de_quant_function; 2602+ } else { 2603+ code.CodeFunction("InitMatrixA", input_tensor_, a_pack_ptr_, "&mat_mul_parameter", vec_matmul_); 2604+ } 2605+ if (params_->b_const_) { 2606+ if (!b_packed_) { 2607+ init_code.CodeBufferOffsetExpression(b_pack_ptr_, context->weight_name(), context->weight_offset_name(), 2608+ context->weight_size_name(), b_pack_ptr_size_); 2609+ w_buf_size += b_pack_ptr_size_; 2610+ std::string b_src_str = b_str; 2611+ if (de_quant_flag_) { 2612+ // reuse to b_pack_str 2613+ b_src_str = Dequant::GetInstance()->de_quant_buffer_str(); 2614+ std::string de_quant_function = Dequant::GetInstance()->GetMicroDeQuantFunction(filter_tensor_, b_str); 2615+ init_code << de_quant_function; 2616+ } 2617+ // b_pack_str has been memset, no need to memset 2618+ init_code.CodeFunction("InitMatrixB", b_src_str, b_pack_ptr_, "&mat_mul_parameter", vec_matmul_); 2619 } 2620- // a_pack_str has been memset, no need to memset 2621- init_code.CodeFunction("InitMatrixA", a_src_str, a_pack_ptr_, "&mat_mul_parameter", vec_matmul_); 2622+ } else { 2623 code.CodeFunction("InitMatrixB", filter_tensor_, b_pack_ptr_, "&mat_mul_parameter", vec_matmul_); 2624 } 2625 int current_stride_oc = thread_stride_ * col_tile_; 2626@@ -257,18 +269,18 @@ int MatMulFP32BaseCoder::DoCode(CoderContext *const context) { 2627 if (vec_matmul_) { 2628 code << " const float *batch_a_ptr = " << a_pack_str << " + i * " << params_->deep_ << ";\n"; 2629 code << " const float *batch_b_ptr = " << b_pack_str << " + i * " << params_->deep_ * params_->col_ << ";\n"; 2630- code << " float *batch_c_ptr = " << c_str << " + i * " << params_->row_ * params_->col_ << ";\n"; 2631+ code << " float *batch_c_ptr = " << c_str << " + i * " << params_->row_ * params_->col_ << ";\n "; 2632 2633- code.CodeFunction("MatVecMulFp32", "batch_a_ptr", "batch_b_ptr", "batch_c_ptr", bias_ptr_, params_->act_type_, 2634+ code.CodeFunction("MatVecMulFp32", "batch_a_ptr", "batch_b_ptr", "batch_c_ptr", bias_ptr_str, params_->act_type_, 2635 params_->deep_, cur_oc); 2636 } else { 2637 code << " const float *batch_a_ptr = " << a_pack_str << " + i * " << params_->row_align_ * params_->deep_ 2638 << ";\n"; 2639 code << " const float *batch_b_ptr = " << b_pack_str << " + i * " << params_->deep_ * params_->col_align_ 2640 << ";\n"; 2641- code << " float *batch_c_ptr = " << c_str << " + i * " << params_->row_ * params_->col_ << ";\n"; 2642+ code << " float *batch_c_ptr = " << c_str << " + i * " << params_->row_ * params_->col_ << ";\n "; 2643 2644- code.CodeFunction("MatMulOpt", "batch_a_ptr", "batch_b_ptr", "batch_c_ptr", bias_ptr_, params_->act_type_, 2645+ code.CodeFunction("MatMulOpt", "batch_a_ptr", "batch_b_ptr", "batch_c_ptr", bias_ptr_str, params_->act_type_, 2646 params_->deep_, params_->row_, cur_oc, params_->col_, "OutType_Nhwc"); 2647 } 2648 code << " }\n"; 2649diff --git a/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp32/matmul_fp32_base_coder.h b/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp32/matmul_fp32_base_coder.h 2650index 4f35254d..68b2658a 100644 2651--- a/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp32/matmul_fp32_base_coder.h 2652+++ b/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp32/matmul_fp32_base_coder.h 2653@@ -36,30 +36,30 @@ class MatMulFP32BaseCoder : public OperatorCoder { 2654 2655 virtual int ReSize(); 2656 2657+ protected: 2658+ virtual void ResizeParameter(); 2659+ virtual int InitBiasData(); 2660+ virtual int InitBufferA(); 2661+ virtual int InitBufferB(); 2662+ virtual int CollectFilesForTarget(CoderContext *const context); 2663+ virtual int Init(); 2664+ virtual void InitParameter(); 2665+ 2666 private: 2667- void ResizeParameter(); 2668- int InitBiasData(); 2669- int InitBufferA(); 2670- int InitBufferB(); 2671 int InitMatrixA(const float *src_ptr); 2672 int InitMatrixB(const float *src_ptr); 2673- int CollectFilesForTarget(CoderContext *const context); 2674- 2675- protected: 2676- virtual int Init(); 2677- void InitParameter(); 2678 2679 protected: 2680 Tensor *filter_tensor_{nullptr}; 2681 Tensor *bias_tensor_{nullptr}; 2682 MatMulParameter *params_{nullptr}; 2683- float *a_pack_ptr_ = nullptr; 2684- float *b_pack_ptr_ = nullptr; 2685- float *bias_ptr_{nullptr}; 2686+ void *a_pack_ptr_ = nullptr; 2687+ void *b_pack_ptr_ = nullptr; 2688+ void *bias_ptr_{nullptr}; 2689 bool vec_matmul_{false}; 2690 bool de_quant_flag_{false}; 2691- 2692- private: 2693+ bool a_packed_{false}; 2694+ bool b_packed_{false}; 2695 int col_tile_{0}; 2696 int row_tile_{0}; 2697 int thread_stride_{0}; 2698@@ -69,6 +69,7 @@ class MatMulFP32BaseCoder : public OperatorCoder { 2699 size_t a_pack_ptr_size_{0}; 2700 size_t b_pack_ptr_size_{0}; 2701 bool is_bias_broadcast_{false}; 2702+ size_t data_type_size_{C4NUM}; 2703 }; 2704 } // namespace mindspore::lite::micro::nnacl 2705 #endif // MINDSPORE_LITE_TOOLS_CONVERTER_MICRO_CODER_OPCODERS_NNACL_FP32_MATMUL_FP32_BASE_CODER_H_ 2706diff --git a/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp32/pooling_fp32_coder.h b/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp32/pooling_fp32_coder.h 2707index df08dcbe..9f4e0026 100644 2708--- a/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp32/pooling_fp32_coder.h 2709+++ b/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp32/pooling_fp32_coder.h 2710@@ -21,7 +21,7 @@ 2711 #include "coder/opcoders/op_coder.h" 2712 2713 namespace mindspore::lite::micro::nnacl { 2714-class PoolingFP32Coder final : public OperatorCoder { 2715+class PoolingFP32Coder : public OperatorCoder { 2716 public: 2717 PoolingFP32Coder(const std::vector<Tensor *> &in_tensors, const std::vector<Tensor *> &out_tensors, 2718 const LiteGraph::Node *node, size_t node_index, Target target) 2719diff --git a/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp32/reduce_fp32_coder.cc b/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp32/reduce_fp32_coder.cc 2720index 661881af..11e8a3ec 100644 2721--- a/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp32/reduce_fp32_coder.cc 2722+++ b/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp32/reduce_fp32_coder.cc 2723@@ -16,7 +16,6 @@ 2724 2725 #include "coder/opcoders/nnacl/fp32/reduce_fp32_coder.h" 2726 #include <string> 2727-#include "coder/log.h" 2728 #include "coder/opcoders/serializers/nnacl_serializer/nnacl_fp32_serializer.h" 2729 #include "coder/opcoders/file_collector.h" 2730 2731@@ -25,14 +24,14 @@ namespace mindspore::lite::micro::nnacl { 2732 int ReduceFP32Coder::Prepare(CoderContext *const context) { 2733 MS_CHECK_RET_CODE(ReduceBaseCoder::Init(), "init failed"); 2734 MS_CHECK_RET_CODE(ReSize(), "resize failed"); 2735- MS_CHECK_RET_CODE(MallocTmpBuffer(), "malloc buffer failed"); 2736+ MS_CHECK_RET_CODE(MallocTmpBuffer(kNumberTypeFloat32), "malloc buffer failed"); 2737 return RET_OK; 2738 } 2739 2740-int ReduceFP32Coder::MallocTmpBuffer() { 2741+int ReduceFP32Coder::MallocTmpBuffer(mindspore::TypeId type_id) { 2742 data_buffers_.clear(); 2743 for (auto size : buffer_sizes_) { 2744- auto *buffer = static_cast<float *>(allocator_->Malloc(kNumberTypeFloat, size * sizeof(float), kWorkspace)); 2745+ auto *buffer = static_cast<float *>(allocator_->Malloc(type_id, size * lite::DataTypeSize(type_id), kWorkspace)); 2746 MS_CHECK_PTR(buffer); 2747 data_buffers_.emplace_back(buffer); 2748 } 2749@@ -57,41 +56,42 @@ int ReduceFP32Coder::DoCode(CoderContext *const context) { 2750 "reduce_fp32.c", 2751 }); 2752 2753- NNaclFp32Serializer code; 2754 // call the op function 2755- std::string reduce; 2756- std::string int_reduce; 2757 switch (mode_) { 2758 case static_cast<int>(schema::ReduceMode_ReduceSum): { 2759- reduce = "ReduceSum"; 2760+ reduce_ = "ReduceSum"; 2761 break; 2762 } 2763 case static_cast<int>(schema::ReduceMode_ReduceMean): { 2764- reduce = "ReduceMean"; 2765+ reduce_ = "ReduceMean"; 2766 break; 2767 } 2768 case static_cast<int>(schema::ReduceMode_ReduceMax): { 2769- reduce = "ReduceMax"; 2770+ reduce_ = "ReduceMax"; 2771 break; 2772 } 2773 case static_cast<int>(schema::ReduceMode_ReduceMin): { 2774- reduce = "ReduceMin"; 2775+ reduce_ = "ReduceMin"; 2776 break; 2777 } 2778 case static_cast<int>(schema::ReduceMode_ReduceProd): { 2779- reduce = "ReduceProd"; 2780- int_reduce = "IntReduceProd"; 2781+ reduce_ = "ReduceProd"; 2782+ int_reduce_ = "IntReduceProd"; 2783 break; 2784 } 2785 case static_cast<int>(schema::ReduceMode_ReduceSumSquare): { 2786- reduce = "ReduceSumSquare"; 2787+ reduce_ = "ReduceSumSquare"; 2788 break; 2789 } 2790 default: 2791- MS_LOG(ERROR) << "Reduce unsupported reduce mode: " << mode_; 2792+ MS_LOG(ERROR) << "Reduce unsupported reduce_ mode: " << mode_; 2793 return RET_ERROR; 2794 } 2795- 2796+ GenerateCode(context); 2797+ return RET_OK; 2798+} 2799+void ReduceFP32Coder::GenerateCode(CoderContext *const context) { 2800+ NNaclFp32Serializer code; 2801 std::string src_addr = allocator_->GetRuntimeAddr(input_tensor_); 2802 std::string dst_addr; 2803 for (int i = 0; i < num_axes_; ++i) { 2804@@ -103,16 +103,16 @@ int ReduceFP32Coder::DoCode(CoderContext *const context) { 2805 outer_size_ = outer_sizes_.at(i); 2806 inner_size_ = inner_sizes_.at(i); 2807 axis_size_ = axis_sizes_.at(i); 2808- if (data_type_ == ::kNumberTypeFloat32) { 2809- code.CodeFunction(reduce, outer_size_, inner_size_, axis_size_, src_addr, dst_addr, 0, thread_num_); 2810+ if (data_type_ == ::kNumberTypeInt32) { 2811+ code.CodeFunction(int_reduce_, outer_size_, inner_size_, axis_size_, src_addr, dst_addr, 0, thread_num_); 2812 } else { 2813- code.CodeFunction(int_reduce, outer_size_, inner_size_, axis_size_, src_addr, dst_addr, 0, thread_num_); 2814+ code.CodeFunction(reduce_, outer_size_, inner_size_, axis_size_, src_addr, dst_addr, 0, thread_num_); 2815 } 2816 src_addr = dst_addr; 2817 } 2818 context->AppendCode(code.str()); 2819- return RET_OK; 2820 } 2821 2822 REG_OPERATOR_CODER(kAllTargets, kNumberTypeFloat32, PrimitiveType_ReduceFusion, CPUOpCoderCreator<ReduceFP32Coder>) 2823+REG_OPERATOR_CODER(kAllTargets, kNumberTypeInt32, PrimitiveType_ReduceFusion, CPUOpCoderCreator<ReduceFP32Coder>) 2824 } // namespace mindspore::lite::micro::nnacl 2825diff --git a/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp32/reduce_fp32_coder.h b/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp32/reduce_fp32_coder.h 2826index a62f35ec..5b9ccd2b 100644 2827--- a/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp32/reduce_fp32_coder.h 2828+++ b/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp32/reduce_fp32_coder.h 2829@@ -23,7 +23,7 @@ 2830 #include "coder/opcoders/op_coder.h" 2831 2832 namespace mindspore::lite::micro::nnacl { 2833-class ReduceFP32Coder final : public ReduceBaseCoder { 2834+class ReduceFP32Coder : public ReduceBaseCoder { 2835 public: 2836 ReduceFP32Coder(const std::vector<Tensor *> &in_tensors, const std::vector<Tensor *> &out_tensors, 2837 const LiteGraph::Node *node, size_t node_index, Target target) 2838@@ -35,11 +35,17 @@ class ReduceFP32Coder final : public ReduceBaseCoder { 2839 2840 int DoCode(CoderContext *const context) override; 2841 2842- private: 2843- int ReSize() override; 2844- int MallocTmpBuffer(); 2845+ protected: 2846+ void GenerateCode(CoderContext *const context); 2847+ int MallocTmpBuffer(mindspore::TypeId type_id); 2848+ 2849+ std::string reduce_; 2850+ std::string int_reduce_; 2851 TypeIdC data_type_{::kNumberTypeFloat32}; 2852 std::vector<float *> data_buffers_; 2853+ 2854+ private: 2855+ int ReSize() override; 2856 }; 2857 } // namespace mindspore::lite::micro::nnacl 2858 #endif // MINDSPORE_LITE_TOOLS_CONVERTER_MICRO_CODER_OPCODERS_NNACL_FP32_REDUCE_FP32_CODER_H_ 2859diff --git a/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp32/resize_fp32_coder.cc b/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp32/resize_fp32_coder.cc 2860index a5acf689..d84d0c60 100644 2861--- a/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp32/resize_fp32_coder.cc 2862+++ b/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp32/resize_fp32_coder.cc 2863@@ -68,7 +68,9 @@ int ResizeFP32Coder::ReSize() { 2864 } 2865 2866 MS_CHECK_RET_CODE_WITH_EXE(MallocTmpBuffer(), "MallocTmpBuffer failed", FreeTmpBuffer()); 2867- MS_CHECK_RET_CODE_WITH_EXE(ResizePrepare(), "ResizePrepare failed", FreeTmpBuffer()); 2868+ if (input_tensor_->data_type() == kNumberTypeFloat32 || input_tensor_->data_type() == kNumberTypeFloat) { 2869+ MS_CHECK_RET_CODE_WITH_EXE(ResizePrepare(), "ResizePrepare failed", FreeTmpBuffer()); 2870+ } 2871 2872 return RET_OK; 2873 } 2874@@ -128,8 +130,8 @@ int ResizeFP32Coder::MallocTmpBuffer() { 2875 } 2876 2877 { 2878- size_t line_buffer_size = sizeof(float) * x_len_ * input_tensor_->Channel() * kTwo * kMaxThreadNumSupported; 2879- line_buffer_ = reinterpret_cast<float *>(allocator_->Malloc(kNumberTypeFloat32, line_buffer_size, kWorkspace)); 2880+ size_t line_buffer_size = DataTypeLen() * x_len_ * input_tensor_->Channel() * kTwo * kMaxThreadNumSupported; 2881+ line_buffer_ = allocator_->Malloc(kNumberTypeUInt8, line_buffer_size, kWorkspace); 2882 CHECK_MALLOC_RES(line_buffer_, RET_NULL_PTR); 2883 } 2884 return RET_OK; 2885@@ -168,12 +170,12 @@ int ResizeFP32Coder::DoCode(CoderContext *const context) { 2886 2887 switch (method_) { 2888 case static_cast<int>(schema::ResizeMethod_LINEAR): { 2889- code.CodeArray("y_bottoms", coordinate_.y_bottoms_, sizeof(int) * y_len_, true); 2890- code.CodeArray("y_tops", coordinate_.y_tops_, sizeof(int) * y_len_, true); 2891- code.CodeArray("x_lefts", coordinate_.x_lefts_, sizeof(int) * x_len_, true); 2892- code.CodeArray("x_rights", coordinate_.x_rights_, sizeof(int) * x_len_, true); 2893- code.CodeArray("y_weights", y_weights_, sizeof(float) * y_weight_len_, true); 2894- code.CodeArray("x_weights", x_weights_, sizeof(float) * x_weight_len_, true); 2895+ code.CodeArray("y_bottoms", coordinate_.y_bottoms_, y_len_, true); 2896+ code.CodeArray("y_tops", coordinate_.y_tops_, y_len_, true); 2897+ code.CodeArray("x_lefts", coordinate_.x_lefts_, x_len_, true); 2898+ code.CodeArray("x_rights", coordinate_.x_rights_, x_len_, true); 2899+ code.CodeArray("y_weights", y_weights_, y_weight_len_, true); 2900+ code.CodeArray("x_weights", x_weights_, x_weight_len_, true); 2901 2902 int c = input_tensor_->shape().at(kNHWC_C); 2903 code << "float *line0 = " << MemoryAllocator::GetInstance()->GetRuntimeAddr(line_buffer_) << ";\n"; 2904@@ -188,12 +190,13 @@ int ResizeFP32Coder::DoCode(CoderContext *const context) { 2905 break; 2906 } 2907 case static_cast<int>(schema::ResizeMethod_CUBIC): { 2908- code.CodeArray("y_tops", coordinate_.y_tops_, sizeof(int) * y_len_, true); 2909- code.CodeArray("x_lefts", coordinate_.x_lefts_, sizeof(int) * x_len_, true); 2910- code.CodeArray("y_weights", y_weights_, sizeof(float) * y_weight_len_, true); 2911- code.CodeArray("x_weights", x_weights_, sizeof(float) * x_weight_len_, true); 2912+ code.CodeArray("y_tops", coordinate_.y_tops_, y_len_, true); 2913+ code.CodeArray("x_lefts", coordinate_.x_lefts_, x_len_, true); 2914+ code.CodeArray("y_weights", y_weights_, y_weight_len_, true); 2915+ code.CodeArray("x_weights", x_weights_, x_weight_len_, true); 2916+ auto buffer_str = "(float *)" + MemoryAllocator::GetInstance()->GetRuntimeAddr(line_buffer_); 2917 code.CodeFunction("ResizeBicubic", input_tensor_, output_tensor_, "input_shape", "output_shape", "y_tops", 2918- "x_lefts", "y_weights", "x_weights", line_buffer_, 0, new_height_); 2919+ "x_lefts", "y_weights", "x_weights", buffer_str, 0, new_height_); 2920 break; 2921 } 2922 default: { 2923diff --git a/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp32/resize_fp32_coder.h b/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp32/resize_fp32_coder.h 2924index 54594c62..34dffd50 100644 2925--- a/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp32/resize_fp32_coder.h 2926+++ b/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp32/resize_fp32_coder.h 2927@@ -27,7 +27,7 @@ 2928 #include "src/runtime/kernel/cpu/fp32/resize_fp32.h" 2929 2930 namespace mindspore::lite::micro::nnacl { 2931-class ResizeFP32Coder final : public ResizeBaseCoder { 2932+class ResizeFP32Coder : public ResizeBaseCoder { 2933 public: 2934 ResizeFP32Coder(const std::vector<Tensor *> &in_tensors, const std::vector<Tensor *> &out_tensors, 2935 const LiteGraph::Node *node, size_t node_index, Target target) 2936@@ -37,12 +37,12 @@ class ResizeFP32Coder final : public ResizeBaseCoder { 2937 int ReSize(); 2938 int DoCode(CoderContext *const context) override; 2939 2940- private: 2941+ protected: 2942 int SelectCalculatorFunc(); 2943 void CalTmpBufferLen(); 2944 int MallocTmpBuffer(); 2945 void FreeTmpBuffer(); 2946- int ResizePrepare(); 2947+ virtual int DataTypeLen() { return sizeof(float); } 2948 2949 ResizeCoordinate coordinate_; 2950 size_t x_len_{0}; 2951@@ -52,9 +52,12 @@ class ResizeFP32Coder final : public ResizeBaseCoder { 2952 2953 float *y_weights_{nullptr}; 2954 float *x_weights_{nullptr}; 2955- float *line_buffer_{nullptr}; 2956+ void *line_buffer_{nullptr}; 2957 CalculateOriginalCoordinate calculate_{nullptr}; 2958 std::string calculate_str_; 2959+ 2960+ private: 2961+ int ResizePrepare(); 2962 }; 2963 } // namespace mindspore::lite::micro::nnacl 2964 #endif // MINDSPORE_LITE_TOOLS_CONVERTER_MICRO_CODER_OPCODERS_NNACL_FP32_RESIZE_FP32_CODER_H_ 2965diff --git a/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp32/scale_fp32_coder.cc b/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp32/scale_fp32_coder.cc 2966index ae28fe2a..9375a71a 100644 2967--- a/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp32/scale_fp32_coder.cc 2968+++ b/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp32/scale_fp32_coder.cc 2969@@ -28,36 +28,14 @@ int ScaleFP32Coder::InitScaleOffset() { 2970 MS_CHECK_PTR(scale_tensor); 2971 if (scale_tensor->data() != nullptr) { 2972 scale_param_->const_scale_ = true; 2973- scale_ = 2974- reinterpret_cast<float *>(allocator_->Malloc(kNumberTypeFloat32, scale_tensor->Size(), kOfflinePackWeight)); 2975- MS_CHECK_PTR(scale_); 2976- MS_CHECK_TRUE(scale_tensor->Size() > 0, "invalid scale tensor size"); 2977- MS_CHECK_RET_CODE(memcpy_s(scale_, scale_tensor->Size(), scale_tensor->data(), scale_tensor->Size()), 2978- "memcpy scale failed"); 2979 } else { 2980 scale_param_->const_scale_ = false; 2981- scale_ = nullptr; 2982 } 2983 2984- if (input_tensors_.size() == DIMENSION_2D) { 2985+ if (input_tensors_.size() == DIMENSION_3D && input_tensors_.at(kBiasIndex)->data() != nullptr) { 2986 scale_param_->const_offset_ = true; 2987- offset_ = 2988- reinterpret_cast<float *>(allocator_->Malloc(kNumberTypeFloat32, scale_tensor->Size(), kOfflinePackWeight)); 2989- MS_CHECK_PTR(offset_); 2990- MS_CHECK_RET_CODE(memset_s(offset_, scale_tensor->Size(), 0, scale_tensor->Size()), "memset_s failed!"); 2991- } else if (input_tensors_.size() == DIMENSION_3D && input_tensors_.at(kBiasIndex)->data() != nullptr) { 2992- scale_param_->const_offset_ = true; 2993- Tensor *offset_tensor = input_tensors_.at(kBiasIndex); 2994- MS_CHECK_PTR(offset_tensor); 2995- offset_ = 2996- reinterpret_cast<float *>(allocator_->Malloc(kNumberTypeFloat32, offset_tensor->Size(), kOfflinePackWeight)); 2997- MS_CHECK_PTR(offset_); 2998- MS_CHECK_TRUE(offset_tensor->Size() > 0, "invalid offset tensor size"); 2999- MS_CHECK_RET_CODE(memcpy_s(offset_, offset_tensor->Size(), offset_tensor->data(), offset_tensor->Size()), 3000- "memcpy_s failed!"); 3001 } else { 3002 scale_param_->const_offset_ = false; 3003- offset_ = nullptr; 3004 } 3005 return RET_OK; 3006 } 3007@@ -125,25 +103,29 @@ int ScaleFP32Coder::DoCode(CoderContext *const context) { 3008 3009 NNaclFp32Serializer code; 3010 code.CodeStruct("scale_parameter", *scale_param_); 3011- 3012+ auto scale = allocator_->GetRuntimeAddr(input_tensors_.at(kWeightIndex), scale_param_->const_scale_); 3013+ std::string offset{"NULL"}; 3014+ if (input_tensors_.size() == DIMENSION_3D) { 3015+ offset = allocator_->GetRuntimeAddr(input_tensors_.at(kBiasIndex), scale_param_->const_offset_); 3016+ } 3017 switch (scale_param_->activation_type_) { 3018 case schema::ActivationType_RELU6: 3019- code.CodeFunction("DoScaleRelu6", input_tensor_, output_tensor_, scale_, offset_, kDefaultTaskId, 3020+ code.CodeFunction("DoScaleRelu6", input_tensor_, output_tensor_, scale, offset, kDefaultTaskId, 3021 "&scale_parameter"); 3022 break; 3023 case schema::ActivationType_RELU: { 3024 if (!support_parallel_) { 3025- code.CodeFunction("DoScaleRelu", input_tensor_, output_tensor_, scale_, offset_, kDefaultTaskId, 3026+ code.CodeFunction("DoScaleRelu", input_tensor_, output_tensor_, scale, offset, kDefaultTaskId, 3027 "&scale_parameter"); 3028 } else { 3029- code.CodeBaseStruct("ScaleFp32Args", kRunArgs, input_tensor_, output_tensor_, scale_, offset_, 3030+ code.CodeBaseStruct("ScaleFp32Args", kRunArgs, input_tensor_, output_tensor_, scale, offset, 3031 "&scale_parameter"); 3032 code.CodeFunction(kParallelLaunch, "DoScaleReluRun", kRunArgsAddr, "scale_parameter.op_parameter_.thread_num_"); 3033 } 3034 break; 3035 } 3036 case schema::ActivationType_NO_ACTIVATION: 3037- code.CodeFunction("DoScale", input_tensor_, output_tensor_, scale_, offset_, kDefaultTaskId, "&scale_parameter"); 3038+ code.CodeFunction("DoScale", input_tensor_, output_tensor_, scale, offset, kDefaultTaskId, "&scale_parameter"); 3039 break; 3040 default: 3041 MS_LOG(ERROR) << "Scale does not support activation type " << scale_param_->activation_type_; 3042diff --git a/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp32/scale_fp32_coder.h b/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp32/scale_fp32_coder.h 3043index 7f8e6242..319ad35a 100644 3044--- a/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp32/scale_fp32_coder.h 3045+++ b/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp32/scale_fp32_coder.h 3046@@ -39,8 +39,6 @@ class ScaleFP32Coder final : public OperatorCoder { 3047 3048 private: 3049 ScaleParameter *scale_param_{nullptr}; 3050- float *scale_{nullptr}; 3051- float *offset_{nullptr}; 3052 }; 3053 } // namespace mindspore::lite::micro::nnacl 3054 #endif // MICRO_CODER_OPCODERS_FP32__CODER_H_ 3055diff --git a/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp32/split_fp32_coder.cc b/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp32/split_fp32_coder.cc 3056new file mode 100644 3057index 00000000..6f817386 3058--- /dev/null 3059+++ b/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp32/split_fp32_coder.cc 3060@@ -0,0 +1,77 @@ 3061+/** 3062+ * Copyright 2022 Huawei Technologies Co., Ltd 3063+ * 3064+ * Licensed under the Apache License, Version 2.0 (the "License"); 3065+ * you may not use this file except in compliance with the License. 3066+ * You may obtain a copy of the License at 3067+ * 3068+ * http://www.apache.org/licenses/LICENSE-2.0 3069+ * 3070+ * Unless required by applicable law or agreed to in writing, software 3071+ * distributed under the License is distributed on an "AS IS" BASIS, 3072+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 3073+ * See the License for the specific language governing permissions and 3074+ * limitations under the License. 3075+ */ 3076+#include "coder/opcoders/nnacl/fp32/split_fp32_coder.h" 3077+#include "coder/opcoders/serializers/nnacl_serializer/nnacl_fp32_serializer.h" 3078+#include "coder/opcoders/file_collector.h" 3079+#include "coder/opcoders/parallel.h" 3080+#include "src/runtime/kernel/cpu/base/split_base.h" 3081+ 3082+using mindspore::schema::PrimitiveType_Split; 3083+ 3084+namespace mindspore::lite::micro::nnacl { 3085+int SplitFP32Coder::Prepare(CoderContext *const context) { 3086+ auto status = mindspore::kernel::SplitBaseCPUKernel::CheckAndInitSplitParam( 3087+ *input_tensor_, reinterpret_cast<SplitParameter *>(parameter_)); 3088+ if (RET_OK != status) { 3089+ MS_LOG(ERROR) << "CheckAndInitSplitParam failed"; 3090+ return status; 3091+ } 3092+ return RET_OK; 3093+} 3094+ 3095+int SplitFP32Coder::DoCode(CoderContext *const context) { 3096+ Collect(context, {"nnacl/base/split_base.h"}, {"split_base.c"}); 3097+ if (support_parallel_) { 3098+ Collect(context, {"wrapper/fp32/split_fp32_wrapper.h"}, {"split_fp32_wrapper.c"}); 3099+ } 3100+ auto param = reinterpret_cast<SplitParameter *>(parameter_); 3101+ int num_unit = param->split_count_ * param->num_split_; 3102+ 3103+ NNaclFp32Serializer code; 3104+ code << " void *output_ptrs[" << output_tensors_.size() << "] = {"; 3105+ for (int i = 0; i < param->num_split_; i++) { 3106+ code << allocator_->GetRuntimeAddr(output_tensors_.at(i)) << ","; 3107+ } 3108+ code << "};\n"; 3109+ code << " int input_dim[" << input_tensor_->shape().size() << "] = {"; 3110+ for (auto &dim : input_tensor_->shape()) { 3111+ code << dim << ","; 3112+ } 3113+ code << "};\n"; 3114+ code << " int split_sizes[" << param->num_split_ << "] = {"; 3115+ for (int i = 0; i < param->num_split_; i++) { 3116+ code << param->split_sizes_[i] << ","; 3117+ } 3118+ code << "};\n"; 3119+ 3120+ code.CodeStruct("split_param", *param); 3121+ if (!support_parallel_) { 3122+ code.CodeFunction("DoSplit", input_tensor_, "(void *)output_ptrs", "input_dim", "0", num_unit, "&split_param", 3123+ lite::DataTypeSize(input_tensor_->data_type())); 3124+ } else { 3125+ code.CodeBaseStruct("SplitFp32Args", kRunArgs, input_tensor_, "(void *)output_ptrs", "input_dim", num_unit, 3126+ lite::DataTypeSize(input_tensor_->data_type()), "&split_param"); 3127+ code.CodeFunction(kParallelLaunch, "DoSplitRun", kRunArgsAddr, "split_param.op_parameter_.thread_num_"); 3128+ } 3129+ 3130+ context->AppendCode(code.str()); 3131+ return RET_OK; 3132+} 3133+ 3134+REG_OPERATOR_CODER(kAllTargets, kNumberTypeFloat32, PrimitiveType_Split, CPUOpCoderCreator<SplitFP32Coder>) 3135+REG_OPERATOR_CODER(kAllTargets, kNumberTypeInt32, PrimitiveType_Split, CPUOpCoderCreator<SplitFP32Coder>) 3136+REG_OPERATOR_CODER(kAllTargets, kNumberTypeFloat16, PrimitiveType_Split, CPUOpCoderCreator<SplitFP32Coder>) 3137+} // namespace mindspore::lite::micro::nnacl 3138diff --git a/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp32/split_fp32_coder.h b/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp32/split_fp32_coder.h 3139new file mode 100644 3140index 00000000..f65214c1 3141--- /dev/null 3142+++ b/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp32/split_fp32_coder.h 3143@@ -0,0 +1,37 @@ 3144+/** 3145+ * Copyright 2022 Huawei Technologies Co., Ltd 3146+ * 3147+ * Licensed under the Apache License, Version 2.0 (the "License"); 3148+ * you may not use this file except in compliance with the License. 3149+ * You may obtain a copy of the License at 3150+ * 3151+ * http://www.apache.org/licenses/LICENSE-2.0 3152+ * 3153+ * Unless required by applicable law or agreed to in writing, software 3154+ * distributed under the License is distributed on an "AS IS" BASIS, 3155+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 3156+ * See the License for the specific language governing permissions and 3157+ * limitations under the License. 3158+ */ 3159+ 3160+#ifndef MINDSPORE_LITE_TOOLS_CONVERTER_MICRO_CODER_OPCODERS_NNACL_FP32_SPLIT_FP32_CODER_H_ 3161+#define MINDSPORE_LITE_TOOLS_CONVERTER_MICRO_CODER_OPCODERS_NNACL_FP32_SPLIT_FP32_CODER_H_ 3162+ 3163+#include <vector> 3164+#include "coder/opcoders/op_coder.h" 3165+#include "nnacl/split_parameter.h" 3166+ 3167+namespace mindspore::lite::micro::nnacl { 3168+class SplitFP32Coder : public OperatorCoder { 3169+ public: 3170+ SplitFP32Coder(const std::vector<Tensor *> &in_tensors, const std::vector<Tensor *> &out_tensors, 3171+ const LiteGraph::Node *node, size_t node_index, Target target) 3172+ : OperatorCoder(in_tensors, out_tensors, node, node_index, target) {} 3173+ ~SplitFP32Coder() override = default; 3174+ 3175+ int Prepare(CoderContext *const context) override; 3176+ 3177+ int DoCode(CoderContext *const context) override; 3178+}; 3179+} // namespace mindspore::lite::micro::nnacl 3180+#endif // MINDSPORE_LITE_TOOLS_CONVERTER_MICRO_CODER_OPCODERS_NNACL_FP32_SPLIT_FP32_CODER_H_ 3181diff --git a/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp32/transpose_fp32_coder.cc b/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp32/transpose_fp32_coder.cc 3182index 2512c9a7..a5882722 100644 3183--- a/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp32/transpose_fp32_coder.cc 3184+++ b/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp32/transpose_fp32_coder.cc 3185@@ -48,10 +48,6 @@ int TransposeFp32Coder::Resize() { 3186 param_->out_strides_[i] = out_shape.at(i + 1) * param_->out_strides_[i + 1]; 3187 } 3188 3189- out_shape_ = 3190- reinterpret_cast<int *>(allocator_->Malloc(kNumberTypeInt32, out_shape.size() * sizeof(int), kOfflinePackWeight)); 3191- MS_CHECK_PTR(out_shape_); 3192- memcpy(out_shape_, out_shape.data(), in_shape.size() * sizeof(int)); 3193 return RET_OK; 3194 } 3195 3196@@ -141,7 +137,9 @@ int TransposeFp32Coder::DoCode(CoderContext *const context) { 3197 } 3198 3199 code.CodeStruct("trans_param", *param_); 3200- dims_ = output_tensor_->shape().size(); 3201+ auto out_shape = output_tensor_->shape(); 3202+ dims_ = static_cast<int>(out_shape.size()); 3203+ code.CodeArray("output_shape", out_shape.data(), dims_, true); 3204 if (dims_ > MAX_TRANSPOSE_DIM_SIZE) { 3205 int *dim_size = reinterpret_cast<int *>(malloc(dims_ * sizeof(int))); 3206 if (dim_size == nullptr) { 3207@@ -149,7 +147,7 @@ int TransposeFp32Coder::DoCode(CoderContext *const context) { 3208 } 3209 *(dim_size + dims_ - 1) = 1; 3210 for (int i = dims_ - 1; i > 0; --i) { 3211- *(dim_size + i - 1) = *(dim_size + i) * out_shape_[i]; 3212+ *(dim_size + i - 1) = *(dim_size + i) * out_shape[i]; 3213 } 3214 code.CodeArray("dim_size", dim_size, dims_); 3215 int *position = reinterpret_cast<int *>(malloc(dims_ * thread_num_ * sizeof(int))); 3216@@ -158,12 +156,12 @@ int TransposeFp32Coder::DoCode(CoderContext *const context) { 3217 return RET_NULL_PTR; 3218 } 3219 code.CodeArray("position", position, dims_ * thread_num_); 3220- code.CodeFunction("TransposeDimsFp32", input_tensor_, output_tensor_, out_shape_, "dim_size", "position", 3221+ code.CodeFunction("TransposeDimsFp32", input_tensor_, output_tensor_, "output_shape", "dim_size", "position", 3222 "&trans_param", kDefaultTaskId, thread_num_); 3223 free(dim_size); 3224 free(position); 3225 } else { 3226- code.CodeFunction("DoTransposeFp32", input_tensor_, output_tensor_, out_shape_, "&trans_param"); 3227+ code.CodeFunction("DoTransposeFp32", input_tensor_, output_tensor_, "output_shape", "&trans_param"); 3228 } 3229 context->AppendCode(code.str()); 3230 return RET_OK; 3231diff --git a/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp32/transpose_fp32_coder.h b/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp32/transpose_fp32_coder.h 3232index c4cd37a9..0fdbb407 100644 3233--- a/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp32/transpose_fp32_coder.h 3234+++ b/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp32/transpose_fp32_coder.h 3235@@ -21,7 +21,7 @@ 3236 #include "coder/opcoders/op_coder.h" 3237 #include "nnacl/transpose.h" 3238 namespace mindspore::lite::micro::nnacl { 3239-class TransposeFp32Coder final : public OperatorCoder { 3240+class TransposeFp32Coder : public OperatorCoder { 3241 public: 3242 TransposeFp32Coder(const std::vector<Tensor *> &in_tensors, const std::vector<Tensor *> &out_tensors, 3243 const LiteGraph::Node *node, size_t node_index, Target target) 3244@@ -33,18 +33,18 @@ class TransposeFp32Coder final : public OperatorCoder { 3245 3246 int DoCode(CoderContext *const context) override; 3247 3248- int Resize(); 3249+ virtual int Resize(); 3250 3251 int Init(); 3252 3253+ protected: 3254+ TransposeParameter *param_{nullptr}; 3255+ int dims_{0}; 3256+ 3257 private: 3258 void GetNHNCTransposeFunc(); 3259- 3260- TransposeParameter *param_{nullptr}; 3261- int *out_shape_{nullptr}; 3262 std::string NHNCTransposeFunc_; 3263 int nhnc_param_[3]; 3264- int dims_{0}; 3265 }; 3266 } // namespace mindspore::lite::micro::nnacl 3267 #endif // MINDSPORE_LITE_TOOLS_CONVERTER_MICRO_CODER_OPCODERS_NNACL_FP32_TRANSPOSE_FP32_CODER_H_ 3268diff --git a/mindspore/lite/tools/converter/micro/coder/opcoders/serializers/nnacl_serializer/nnacl_fp32_serializer.cc b/mindspore/lite/tools/converter/micro/coder/opcoders/serializers/nnacl_serializer/nnacl_fp32_serializer.cc 3269index 49314886..c333b621 100644 3270--- a/mindspore/lite/tools/converter/micro/coder/opcoders/serializers/nnacl_serializer/nnacl_fp32_serializer.cc 3271+++ b/mindspore/lite/tools/converter/micro/coder/opcoders/serializers/nnacl_serializer/nnacl_fp32_serializer.cc 3272@@ -177,6 +177,25 @@ void NNaclFp32Serializer::CodeStruct(const std::string &name, const OpParameter 3273 op_param.is_train_session_, op_param.is_zero_shape_); 3274 } 3275 3276+void NNaclFp32Serializer::CodeStruct(const std::string &name, const SplitParameter &split_parameter) { 3277+ CodeBaseStruct("SplitParameter", name, split_parameter.op_parameter_, split_parameter.num_split_, "split_sizes", 3278+ split_parameter.split_dim_, ToString(split_parameter.strides_), "{0}", split_parameter.n_dims_, 3279+ split_parameter.split_count_); 3280+} 3281+ 3282+void NNaclFp32Serializer::CodeStruct(const std::string &name, const LayerNormParameter &op_param) { 3283+ CodeBaseStruct<false>("LayerNormParameter", name, op_param.op_parameter_, op_param.epsilon_, 3284+ op_param.elementwise_mode_, op_param.elementwise_affine_, op_param.begin_norm_axis_, 3285+ op_param.begin_params_axis_, op_param.norm_inner_size_, op_param.norm_outer_size_, 3286+ op_param.params_inner_size_, op_param.params_outer_size_, op_param.normalized_dims_, 3287+ ToString(op_param.normalized_shape_), op_param.thread_count_, op_param.thread_outsize_); 3288+} 3289+ 3290+void NNaclFp32Serializer::CodeStruct(const std::string &name, const BroadcastShapeInfo &op_param) { 3291+ CodeBaseStruct<false>("BroadcastShapeInfo", name, ToString(op_param.input_shape_), op_param.input_shape_size_, 3292+ ToString(op_param.output_shape_), op_param.output_shape_size_); 3293+} 3294+ 3295 void NNaclFp32Serializer::CodeArrayStruct(const std::string &name, TensorC *tensorC, std::vector<Tensor *> tensor) { 3296 std::vector<std::string> tensor_names; 3297 int size = tensor.size(); 3298diff --git a/mindspore/lite/tools/converter/micro/coder/opcoders/serializers/nnacl_serializer/nnacl_fp32_serializer.h b/mindspore/lite/tools/converter/micro/coder/opcoders/serializers/nnacl_serializer/nnacl_fp32_serializer.h 3299index 8e1350f5..f52ced20 100644 3300--- a/mindspore/lite/tools/converter/micro/coder/opcoders/serializers/nnacl_serializer/nnacl_fp32_serializer.h 3301+++ b/mindspore/lite/tools/converter/micro/coder/opcoders/serializers/nnacl_serializer/nnacl_fp32_serializer.h 3302@@ -41,6 +41,9 @@ 3303 #include "wrapper/fp32/arithmetic_fp32_wrapper.h" 3304 #include "wrapper/base/affine_wrapper.h" 3305 #include "wrapper/fp32/conv_winograd_fp32_wrapper.h" 3306+#include "nnacl/layer_norm_parameter.h" 3307+#include "nnacl/broadcast_to_parameter.h" 3308+#include "nnacl/split_parameter.h" 3309 3310 namespace mindspore::lite::micro::nnacl { 3311 class NNaclFp32Serializer : public Serializer { 3312@@ -68,6 +71,9 @@ class NNaclFp32Serializer : public Serializer { 3313 void CodeStruct(const std::string &name, const GroupNormParameter &gn_param); 3314 void CodeStruct(const std::string &name, const ActivationParameter &activation_parameter); 3315 void CodeStruct(const std::string &name, const OpParameter &op_param); 3316+ void CodeStruct(const std::string &name, const SplitParameter &split_parameter); 3317+ void CodeStruct(const std::string &name, const LayerNormParameter ¶m); 3318+ void CodeStruct(const std::string &name, const BroadcastShapeInfo ¶m); 3319 void CodeArrayStruct(const std::string &name, TensorC *tensorC, std::vector<Tensor *> tensor); 3320 3321 private: 3322diff --git a/mindspore/lite/tools/converter/micro/coder/session.cc b/mindspore/lite/tools/converter/micro/coder/session.cc 3323index 10dc2dc6..471f1491 100644 3324--- a/mindspore/lite/tools/converter/micro/coder/session.cc 3325+++ b/mindspore/lite/tools/converter/micro/coder/session.cc 3326@@ -119,12 +119,13 @@ int CoderSession::GenerateCode() { 3327 return ret; 3328 } 3329 3330-int CoderSession::Init(const void *content, int size) { 3331+int CoderSession::Init(const void *content, int size, bool enableFp16) { 3332 MS_LOG(INFO) << "CoderSession::Init start"; 3333 Model *model = lite::Model::Import(static_cast<const char *>(content), size); 3334 MS_CHECK_PTR(model); 3335 coder_graph_ = std::make_unique<CoderGraph>(model); 3336 context_ = std::make_unique<CoderContext>(); 3337+ enableFp16_ = enableFp16; 3338 MS_LOG(INFO) << "CoderSession::Init done"; 3339 return RET_OK; 3340 } 3341@@ -299,7 +300,7 @@ int CoderSession::CreateOpCoders() { 3342 } 3343 3344 int CoderSession::InitCodeGraph() { 3345- MS_CHECK_RET_CODE(coder_graph_->ConvertTensors(), "convert tensors failed"); 3346+ MS_CHECK_RET_CODE(coder_graph_->ConvertTensors(enableFp16_), "convert tensors failed"); 3347 MS_CHECK_RET_CODE(coder_graph_->InitGraphInOutTensors(), "init graph inputs and outputs failed"); 3348 return RET_OK; 3349 } 3350diff --git a/mindspore/lite/tools/converter/micro/coder/session.h b/mindspore/lite/tools/converter/micro/coder/session.h 3351index f1039af0..3a8f7290 100644 3352--- a/mindspore/lite/tools/converter/micro/coder/session.h 3353+++ b/mindspore/lite/tools/converter/micro/coder/session.h 3354@@ -34,7 +34,7 @@ class CoderSession { 3355 3356 ~CoderSession(); 3357 3358- int Init(const void *content, int size); 3359+ int Init(const void *content, int size, bool enableFp16); 3360 3361 int Build(); 3362 3363@@ -57,6 +57,7 @@ class CoderSession { 3364 MemoryAllocator *allocator_{nullptr}; 3365 std::vector<std::unique_ptr<OperatorCoder>> op_coders_; 3366 int schema_version_ = SCHEMA_VERSION::SCHEMA_CUR; 3367+ bool enableFp16_{false}; 3368 }; 3369 3370 std::shared_ptr<CoderSession> CreateCoderSession(); 3371-- 33722.17.1 3373 3374