• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1From 6576d2861ab7e95c7e7b6a284ebc5f3159f4398d Mon Sep 17 00:00:00 2001
2From: z00574805 <z00574805@notesmail.huawei.com/>
3Date: Wed, 24 May 2023 11:04:47 +0800
4Subject: [PATCH 2/5] xiaoyi-0002
5
6---
7 mindspore/lite/tools/converter/converter.cc   |   2 +-
8 .../converter/micro/cmake/file_list.cmake     |  13 +
9 .../micro/coder/allocator/allocator.cc        |  19 +-
10 .../micro/coder/allocator/allocator.h         |   9 +-
11 .../lite/tools/converter/micro/coder/coder.cc |   8 +-
12 .../lite/tools/converter/micro/coder/coder.h  |   4 +-
13 .../lite/tools/converter/micro/coder/graph.cc |  30 +-
14 .../lite/tools/converter/micro/coder/graph.h  |   2 +-
15 .../nnacl/fp16/activation_fp16_coder.cc       |  84 +++++
16 .../nnacl/fp16/activation_fp16_coder.h        |  37 +++
17 .../nnacl/fp16/arithmetic_fp16_coder.cc       | 175 +++++++++++
18 .../nnacl/fp16/arithmetic_fp16_coder.h        |  47 +++
19 .../nnacl/fp16/avg_pooling_fp16_coder.cc      |  87 ++++++
20 .../nnacl/fp16/avg_pooling_fp16_coder.h       |  36 +++
21 .../opcoders/nnacl/fp16/concat_fp16_coder.cc  |  88 ++++++
22 .../opcoders/nnacl/fp16/concat_fp16_coder.h   |  42 +++
23 .../nnacl/fp16/layernorm_fp16_coder.cc        |  56 ++++
24 .../nnacl/fp16/layernorm_fp16_coder.h         |  37 +++
25 .../nnacl/fp16/matmul_fp16_base_coder.cc      | 286 ++++++++++++++++++
26 .../nnacl/fp16/matmul_fp16_base_coder.h       |  58 ++++
27 .../opcoders/nnacl/fp16/matmul_fp16_coder.cc  |  79 +++++
28 .../opcoders/nnacl/fp16/matmul_fp16_coder.h   |  44 +++
29 .../opcoders/nnacl/fp16/reduce_fp16_coder.cc  |  75 +++++
30 .../opcoders/nnacl/fp16/reduce_fp16_coder.h   |  40 +++
31 .../opcoders/nnacl/fp16/resize_fp16_coder.cc  | 108 +++++++
32 .../opcoders/nnacl/fp16/resize_fp16_coder.h   |  41 +++
33 .../nnacl/fp16/transpose_fp16_coder.cc        | 140 +++++++++
34 .../nnacl/fp16/transpose_fp16_coder.h         |  43 +++
35 .../nnacl/fp32/activation_fp32_coder.cc       |   8 +-
36 .../nnacl/fp32/activation_fp32_coder.h        |   2 +-
37 .../nnacl/fp32/arithmetic_fp32_coder.cc       |  38 ++-
38 .../nnacl/fp32/arithmetic_fp32_coder.h        |  34 ++-
39 .../opcoders/nnacl/fp32/concat_fp32_coder.h   |   2 +-
40 .../opcoders/nnacl/fp32/exp_fp32_coder.cc     |   2 +-
41 .../opcoders/nnacl/fp32/gather_fp32_coder.cc  |   2 +
42 .../nnacl/fp32/layernorm_fp32_coder.cc        |  81 +++++
43 .../nnacl/fp32/layernorm_fp32_coder.h         |  40 +++
44 .../opcoders/nnacl/fp32/lstm_fp32_coder.cc    |  18 +-
45 .../nnacl/fp32/matmul_fp32_base_coder.cc      | 104 ++++---
46 .../nnacl/fp32/matmul_fp32_base_coder.h       |  29 +-
47 .../opcoders/nnacl/fp32/pooling_fp32_coder.h  |   2 +-
48 .../opcoders/nnacl/fp32/reduce_fp32_coder.cc  |  40 +--
49 .../opcoders/nnacl/fp32/reduce_fp32_coder.h   |  14 +-
50 .../opcoders/nnacl/fp32/resize_fp32_coder.cc  |  31 +-
51 .../opcoders/nnacl/fp32/resize_fp32_coder.h   |  11 +-
52 .../opcoders/nnacl/fp32/scale_fp32_coder.cc   |  38 +--
53 .../opcoders/nnacl/fp32/scale_fp32_coder.h    |   2 -
54 .../opcoders/nnacl/fp32/split_fp32_coder.cc   |  77 +++++
55 .../opcoders/nnacl/fp32/split_fp32_coder.h    |  37 +++
56 .../nnacl/fp32/transpose_fp32_coder.cc        |  14 +-
57 .../nnacl/fp32/transpose_fp32_coder.h         |  12 +-
58 .../nnacl_serializer/nnacl_fp32_serializer.cc |  19 ++
59 .../nnacl_serializer/nnacl_fp32_serializer.h  |   6 +
60 .../tools/converter/micro/coder/session.cc    |   5 +-
61 .../tools/converter/micro/coder/session.h     |   3 +-
62 55 files changed, 2140 insertions(+), 221 deletions(-)
63 create mode 100644 mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp16/activation_fp16_coder.cc
64 create mode 100644 mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp16/activation_fp16_coder.h
65 create mode 100644 mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp16/arithmetic_fp16_coder.cc
66 create mode 100644 mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp16/arithmetic_fp16_coder.h
67 create mode 100644 mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp16/avg_pooling_fp16_coder.cc
68 create mode 100644 mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp16/avg_pooling_fp16_coder.h
69 create mode 100644 mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp16/concat_fp16_coder.cc
70 create mode 100644 mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp16/concat_fp16_coder.h
71 create mode 100644 mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp16/layernorm_fp16_coder.cc
72 create mode 100644 mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp16/layernorm_fp16_coder.h
73 create mode 100644 mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp16/matmul_fp16_base_coder.cc
74 create mode 100644 mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp16/matmul_fp16_base_coder.h
75 create mode 100644 mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp16/matmul_fp16_coder.cc
76 create mode 100644 mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp16/matmul_fp16_coder.h
77 create mode 100644 mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp16/reduce_fp16_coder.cc
78 create mode 100644 mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp16/reduce_fp16_coder.h
79 create mode 100644 mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp16/resize_fp16_coder.cc
80 create mode 100644 mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp16/resize_fp16_coder.h
81 create mode 100644 mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp16/transpose_fp16_coder.cc
82 create mode 100644 mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp16/transpose_fp16_coder.h
83 create mode 100644 mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp32/layernorm_fp32_coder.cc
84 create mode 100644 mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp32/layernorm_fp32_coder.h
85 create mode 100644 mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp32/split_fp32_coder.cc
86 create mode 100644 mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp32/split_fp32_coder.h
87
88diff --git a/mindspore/lite/tools/converter/converter.cc b/mindspore/lite/tools/converter/converter.cc
89index eaa18d6b..944ed29c 100644
90--- a/mindspore/lite/tools/converter/converter.cc
91+++ b/mindspore/lite/tools/converter/converter.cc
92@@ -799,7 +799,7 @@ int RunConverter(const std::shared_ptr<ConverterPara> &param, void **model_data,
93   if (param->microParam.enable_micro) {
94     status = micro::Coder::MicroSourceCodeGeneration(*meta_graph, param->output_file, param->microParam.codegen_mode,
95                                                      param->microParam.target, param->microParam.support_parallel,
96-                                                     param->microParam.debug_mode);
97+                                                     param->microParam.debug_mode, param->weight_fp16);
98     if (status != RET_OK) {
99       delete meta_graph;
100       CONVERTER_LOG_ERROR("MICRO CODEGEN FAILED:" << status << " " << GetErrorInfo(status));
101diff --git a/mindspore/lite/tools/converter/micro/cmake/file_list.cmake b/mindspore/lite/tools/converter/micro/cmake/file_list.cmake
102index 843b523e..9ae54538 100644
103--- a/mindspore/lite/tools/converter/micro/cmake/file_list.cmake
104+++ b/mindspore/lite/tools/converter/micro/cmake/file_list.cmake
105@@ -66,6 +66,17 @@ set(CODER_OPCODERS_SRC
106         ${MICRO_DIR}/coder/opcoders/cmsis-nn/int8/pooling_int8_coder.cc
107         ${MICRO_DIR}/coder/opcoders/cmsis-nn/int8/reshape_int8_coder.cc
108         ${MICRO_DIR}/coder/opcoders/cmsis-nn/int8/softmax_int8_coder.cc
109+        #### nnacl fp16 coder
110+        ${MICRO_DIR}/coder/opcoders/nnacl/fp16/activation_fp16_coder.cc
111+        ${MICRO_DIR}/coder/opcoders/nnacl/fp16/arithmetic_fp16_coder.cc
112+        ${MICRO_DIR}/coder/opcoders/nnacl/fp16/avg_pooling_fp16_coder.cc
113+        ${MICRO_DIR}/coder/opcoders/nnacl/fp16/concat_fp16_coder.cc
114+        ${MICRO_DIR}/coder/opcoders/nnacl/fp16/transpose_fp16_coder.cc
115+        ${MICRO_DIR}/coder/opcoders/nnacl/fp16/matmul_fp16_coder.cc
116+        ${MICRO_DIR}/coder/opcoders/nnacl/fp16/matmul_fp16_base_coder.cc
117+        ${MICRO_DIR}/coder/opcoders/nnacl/fp16/layernorm_fp16_coder.cc
118+        ${MICRO_DIR}/coder/opcoders/nnacl/fp16/reduce_fp16_coder.cc
119+        ${MICRO_DIR}/coder/opcoders/nnacl/fp16/resize_fp16_coder.cc
120         #### nnacl fp32 coder
121         ${MICRO_DIR}/coder/opcoders/nnacl/fp32/activation_fp32_coder.cc
122         ${MICRO_DIR}/coder/opcoders/nnacl/fp32/addn_fp32_coder.cc
123@@ -99,6 +110,8 @@ set(CODER_OPCODERS_SRC
124         ${MICRO_DIR}/coder/opcoders/nnacl/fp32/exp_fp32_coder.cc
125         ${MICRO_DIR}/coder/opcoders/nnacl/fp32/deconv2d_fp32_coder.cc
126         ${MICRO_DIR}/coder/opcoders/nnacl/fp32/prelu_fp32_coder.cc
127+        ${MICRO_DIR}/coder/opcoders/nnacl/fp32/layernorm_fp32_coder.cc
128+        ${MICRO_DIR}/coder/opcoders/nnacl/fp32/split_fp32_coder.cc
129         #### nnacl int8 coder
130         ${MICRO_DIR}/coder/opcoders/nnacl/int8/activation_int8_coder.cc
131         ${MICRO_DIR}/coder/opcoders/nnacl/int8/affine_int8_coder.cc
132diff --git a/mindspore/lite/tools/converter/micro/coder/allocator/allocator.cc b/mindspore/lite/tools/converter/micro/coder/allocator/allocator.cc
133index 99ef06de..9c5839b4 100644
134--- a/mindspore/lite/tools/converter/micro/coder/allocator/allocator.cc
135+++ b/mindspore/lite/tools/converter/micro/coder/allocator/allocator.cc
136@@ -22,11 +22,13 @@
137
138 namespace mindspore::lite::micro {
139 namespace {
140-const std::map<TypeId, size_t> size_map = {{kNumberTypeFloat, sizeof(float)},   {kNumberTypeFloat32, sizeof(float)},
141-                                           {kNumberTypeInt32, sizeof(int32_t)}, {kNumberTypeInt16, sizeof(int16_t)},
142-                                           {kNumberTypeInt8, sizeof(int8_t)},   {kNumberTypeUInt8, sizeof(uint8_t)}};
143+const std::map<TypeId, size_t> size_map = {{kNumberTypeFloat, sizeof(float)},      {kNumberTypeFloat32, sizeof(float)},
144+                                           {kNumberTypeInt32, sizeof(int32_t)},    {kNumberTypeInt16, sizeof(int16_t)},
145+                                           {kNumberTypeFloat16, sizeof(uint16_t)}, {kNumberTypeInt8, sizeof(int8_t)},
146+                                           {kNumberTypeUInt8, sizeof(uint8_t)}};
147 }
148-void *MemoryAllocator::MallocWeightTensor(TypeId type_id, size_t size, MallocType type) {
149+void *MemoryAllocator::MallocWeightTensor(TypeId type_id, size_t size, MallocType type,
150+                                          const std::string &tensor_name) {
151   auto item = size_map.find(type_id);
152   MS_CHECK_TRUE_RET_NULL(item != size_map.end(), "unsupported type idnex");
153
154@@ -36,6 +38,7 @@ void *MemoryAllocator::MallocWeightTensor(TypeId type_id, size_t size, MallocTyp
155   auto cate = type == kOfflinePackWeight ? lite::Category::CONST_TENSOR : lite::Category::VAR;
156   Tensor *weight = new (std::nothrow) lite::Tensor(type_id, shape, mindspore::NHWC, cate);
157   MS_CHECK_PTR_RET_NULL(weight);
158+  weight->set_tensor_name(tensor_name);
159   std::string runtime_addr = kWeightPrefixName + std::to_string(weight_index_++);
160   malloc_weights_addr_.insert(std::make_pair(weight, runtime_addr));
161   if (type == kOfflinePackWeight) {
162@@ -152,4 +155,12 @@ int MemoryAllocator::Assign(const std::vector<Tensor *> &inputs,
163   RecordOriginWeightsAddr(nodes);
164   return AssignTensors(nodes);
165 }
166+
167+void MemoryAllocator::MarkSharedWeight(const Tensor *src, void *pack_weight) {
168+  shared_pack_weights_[src] = pack_weight;
169+}
170+
171+void *MemoryAllocator::GetSharedWeightAddr(const Tensor *src) {
172+  return shared_pack_weights_.find(src) == shared_pack_weights_.end() ? nullptr : shared_pack_weights_[src];
173+}
174 }  // namespace mindspore::lite::micro
175diff --git a/mindspore/lite/tools/converter/micro/coder/allocator/allocator.h b/mindspore/lite/tools/converter/micro/coder/allocator/allocator.h
176index f8decca1..8a1331fb 100644
177--- a/mindspore/lite/tools/converter/micro/coder/allocator/allocator.h
178+++ b/mindspore/lite/tools/converter/micro/coder/allocator/allocator.h
179@@ -65,9 +65,9 @@ class MemoryAllocator {
180    * in view of weight, bias and workspace
181    */
182
183-  void *Malloc(TypeId type_id, size_t size, MallocType type) {
184+  void *Malloc(TypeId type_id, size_t size, MallocType type, const std::string &tensor_name = "") {
185     if (type != kWorkspace) {
186-      return MallocWeightTensor(type_id, size, type);
187+      return MallocWeightTensor(type_id, size, type, tensor_name);
188     }
189     if (size == 0 || size >= UINT_MAX) {
190       return nullptr;
191@@ -138,7 +138,9 @@ class MemoryAllocator {
192   std::map<std::string, Tensor *> saved_weights() const { return saved_weights_addr_; }
193   size_t total_buffer_size() const { return tensors_size_ + workspace_size_; }
194   void enable_is_next() { is_next_ = true; }
195-  void *MallocWeightTensor(TypeId type_id, size_t size, MallocType type);
196+  void *MallocWeightTensor(TypeId type_id, size_t size, MallocType type, const std::string &tensor_name = "");
197+  void MarkSharedWeight(const Tensor *src, void *pack_weight);
198+  void *GetSharedWeightAddr(const Tensor *src);
199
200  private:
201   int AssignTensors(const std::vector<std::unique_ptr<OperatorCoder>> &nodes);
202@@ -162,6 +164,7 @@ class MemoryAllocator {
203   std::map<Tensor *, std::string> origin_weights_addr_;
204   std::map<Tensor *, std::string> malloc_weights_addr_;
205   std::map<Tensor *, std::string> tensors_addr_;
206+  std::map<const Tensor *, void *> shared_pack_weights_;
207 };
208 }  // namespace mindspore::lite::micro
209 #endif  // MINDSPORE_LITE_TOOLS_CONVERTER_MICRO_CODER_ALLOCATOR_ALLOCATOR_H_
210diff --git a/mindspore/lite/tools/converter/micro/coder/coder.cc b/mindspore/lite/tools/converter/micro/coder/coder.cc
211index 9711d4b9..cca4687e 100644
212--- a/mindspore/lite/tools/converter/micro/coder/coder.cc
213+++ b/mindspore/lite/tools/converter/micro/coder/coder.cc
214@@ -29,13 +29,13 @@
215 #include "tools/converter/micro/coder/generator/component/component.h"
216
217 namespace mindspore::lite::micro {
218-int Coder::Run(const void *model_buff, size_t size) {
219+int Coder::Run(const void *model_buff, size_t size, bool enableFp16) {
220   session_ = CreateCoderSession();
221   if (session_ == nullptr) {
222     MS_LOG(ERROR) << "new session failed while running!";
223     return RET_ERROR;
224   }
225-  STATUS status = session_->Init(model_buff, size);
226+  STATUS status = session_->Init(model_buff, size, enableFp16);
227   if (status != RET_OK) {
228     MS_LOG(ERROR) << "Init session failed!";
229     return RET_ERROR;
230@@ -94,7 +94,7 @@ bool Coder::InitPath(const std::string &output_path) {
231
232 int Coder::MicroSourceCodeGeneration(const schema::MetaGraphT &graph, const std::string &output_path,
233                                      const std::string &codegen_mode, const std::string &device, bool support_parallel,
234-                                     bool debug_mode) {
235+                                     bool debug_mode, bool enableFp16) {
236   flatbuffers::FlatBufferBuilder builder(kFlatbuffersBuilderInitSize);
237   auto offset = schema::MetaGraph::Pack(builder, &graph);
238   builder.Finish(offset);
239@@ -111,7 +111,7 @@ int Coder::MicroSourceCodeGeneration(const schema::MetaGraphT &graph, const std:
240     MS_LOG(ERROR) << "Codegen init Error";
241     return RET_ERROR;
242   }
243-  status = code_gen.Run(builder.GetBufferPointer(), size);
244+  status = code_gen.Run(builder.GetBufferPointer(), size, enableFp16);
245   if (status != RET_OK) {
246     MS_LOG(ERROR) << "Codegen Run Error";
247     return RET_ERROR;
248diff --git a/mindspore/lite/tools/converter/micro/coder/coder.h b/mindspore/lite/tools/converter/micro/coder/coder.h
249index 42ba153f..96531e6f 100644
250--- a/mindspore/lite/tools/converter/micro/coder/coder.h
251+++ b/mindspore/lite/tools/converter/micro/coder/coder.h
252@@ -32,11 +32,11 @@ class Coder final {
253   ~Coder() = default;
254   static int MicroSourceCodeGeneration(const schema::MetaGraphT &graph, const std::string &output_path,
255                                        const std::string &codegen_mode, const std::string &device,
256-                                       bool support_parallel, bool debug_mode);
257+                                       bool support_parallel, bool debug_mode, bool enableFp16);
258
259  private:
260   int Init(const std::string code_mode, const std::string target, bool support_parallel, bool debug_mode_) const;
261-  int Run(const void *model_buff, size_t size);
262+  int Run(const void *model_buff, size_t size, bool enableFp16);
263   bool InitPath(const std::string &output_path);
264   std::shared_ptr<CoderSession> session_{nullptr};
265
266diff --git a/mindspore/lite/tools/converter/micro/coder/graph.cc b/mindspore/lite/tools/converter/micro/coder/graph.cc
267index b9fa5f2f..ee45d042 100644
268--- a/mindspore/lite/tools/converter/micro/coder/graph.cc
269+++ b/mindspore/lite/tools/converter/micro/coder/graph.cc
270@@ -28,6 +28,7 @@
271 #include "securec/include/securec.h"
272 #include "src/common/prim_util.h"
273 #include "src/runtime/lite_model.h"
274+#include "base/float16.h"
275
276 namespace mindspore::lite::micro {
277 CoderGraph::~CoderGraph() {
278@@ -41,7 +42,7 @@ CoderGraph::~CoderGraph() {
279   }
280 }
281
282-int CoderGraph::ConvertTensors() {
283+int CoderGraph::ConvertTensors(bool enableFp16) {
284   if (model_ == nullptr) {
285     MS_LOG(ERROR) << "Graph model is nullptr";
286     return RET_ERROR;
287@@ -86,14 +87,27 @@ int CoderGraph::ConvertTensors() {
288     if (origin_tensor->nodeType() == NodeType_ValueNode && origin_tensor->data() != nullptr &&
289         origin_tensor->data()->size() > 0) {
290       // copy data, this is weight && bias
291-      MS_CHECK_TRUE_WITH_EXE(origin_tensor->data()->size() > 0, "invalid meta_tensor data size.", delete dstTensor);
292-      auto data_size = static_cast<size_t>(origin_tensor->data()->size());
293-      MS_CHECK_RET_CODE_WITH_EXE(dstTensor->MallocData(), "dst tensor malloc data failed!", delete dstTensor);
294-      void *dst_data = dstTensor->data();
295-      MS_CHECK_RET_CODE_WITH_EXE(memcpy_s(dst_data, dstTensor->Size(), origin_tensor->data()->data(), data_size),
296-                                 "memcpy_s copy data failed!", delete dstTensor);
297-      dstTensor->set_data(dst_data);
298+      if (enableFp16 && origin_data_type == kNumberTypeFloat32) {
299+        dstTensor->set_data_type(kNumberTypeFloat16);
300+        auto data = dstTensor->MutableData();
301+        MS_CHECK_TRUE_WITH_EXE(data != nullptr, "dst tensor malloc data failed!", delete dstTensor);
302+        auto fp32_data = reinterpret_cast<const float *>(origin_tensor->data()->data());
303+        auto fp16_data = reinterpret_cast<float16 *>(data);
304+        CHECK_NULL_RETURN(fp32_data);
305+        CHECK_NULL_RETURN(fp16_data);
306+        for (int64_t j = 0; j < dstTensor->ElementsNum(); ++j) {
307+          fp16_data[j] = float16(fp32_data[j]);
308+        }
309+
310+      } else {
311+        MS_CHECK_RET_CODE_WITH_EXE(memcpy_s(dstTensor->MutableData(), dstTensor->Size(), origin_tensor->data()->data(),
312+                                            origin_tensor->data()->size()),
313+                                   "memcpy_s copy data failed!", delete dstTensor);
314+      }
315+    } else if (enableFp16 && origin_data_type == kNumberTypeFloat32) {
316+      dstTensor->set_data_type(kNumberTypeFloat16);
317     }
318+
319     if (origin_tensor->name() != nullptr) {
320       dstTensor->set_tensor_name(origin_tensor->name()->str());
321     }
322diff --git a/mindspore/lite/tools/converter/micro/coder/graph.h b/mindspore/lite/tools/converter/micro/coder/graph.h
323index 5fb22f15..35d3cb2b 100644
324--- a/mindspore/lite/tools/converter/micro/coder/graph.h
325+++ b/mindspore/lite/tools/converter/micro/coder/graph.h
326@@ -35,7 +35,7 @@ class CoderGraph {
327   explicit CoderGraph(Model *model) : model_(model) {}
328   ~CoderGraph();
329
330-  int ConvertTensors();
331+  int ConvertTensors(bool enableFp16);
332   int InitGraphInOutTensors();
333
334   void SetAllTensors(const std::vector<Tensor *> &all_tensors);
335diff --git a/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp16/activation_fp16_coder.cc b/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp16/activation_fp16_coder.cc
336new file mode 100644
337index 00000000..0fdf0a7f
338--- /dev/null
339+++ b/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp16/activation_fp16_coder.cc
340@@ -0,0 +1,84 @@
341+/**
342+ * Copyright 2023 Huawei Technologies Co., Ltd
343+ *
344+ * Licensed under the Apache License, Version 2.0 (the "License");
345+ * you may not use this file except in compliance with the License.
346+ * You may obtain a copy of the License at
347+ *
348+ * http://www.apache.org/licenses/LICENSE-2.0
349+ *
350+ * Unless required by applicable law or agreed to in writing, software
351+ * distributed under the License is distributed on an "AS IS" BASIS,
352+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
353+ * See the License for the specific language governing permissions and
354+ * limitations under the License.
355+ */
356+#include "coder/opcoders/nnacl/fp16/activation_fp16_coder.h"
357+#include "coder/opcoders/serializers/nnacl_serializer/nnacl_fp32_serializer.h"
358+#include "coder/opcoders/file_collector.h"
359+
360+using mindspore::schema::PrimitiveType_Activation;
361+
362+namespace mindspore::lite::micro::nnacl {
363+int ActivationFP16Coder::Prepare(CoderContext *const context) {
364+  if (input_tensor_->data_type() != kNumberTypeFloat16) {
365+    MS_LOG(INFO) << "Input tensor data type is invalid";
366+    return RET_INPUT_PARAM_INVALID;
367+  }
368+  return RET_OK;
369+}
370+
371+int ActivationFP16Coder::DoCode(CoderContext *const context) {
372+  // attribute
373+  auto *activation_parameter = reinterpret_cast<ActivationParameter *>(parameter_);
374+  MS_CHECK_PTR(activation_parameter);
375+  int count = input_tensor_->ElementsNum();
376+  Collect(context,
377+          {
378+            "nnacl/fp16/activation_fp16.h",
379+          },
380+          {
381+            "activation_fp16.c",
382+          });
383+  NNaclFp32Serializer code;
384+
385+  switch (activation_parameter->type_) {
386+    case schema::ActivationType_RELU:
387+      code.CodeFunction("ReluFp16", input_tensor_, output_tensor_, count);
388+      break;
389+    case schema::ActivationType_RELU6:
390+      code.CodeFunction("Relu6Fp16", input_tensor_, output_tensor_, count);
391+      break;
392+    case schema::ActivationType_LEAKY_RELU:
393+      code.CodeFunction("LReluFp16", input_tensor_, output_tensor_, count, activation_parameter->alpha_);
394+      break;
395+    case schema::ActivationType_SIGMOID:
396+      code.CodeFunction("SigmoidFp16", input_tensor_, output_tensor_, count);
397+      break;
398+    case schema::ActivationType_TANH:
399+      code.CodeFunction("TanhFp16", input_tensor_, output_tensor_, count);
400+      break;
401+    case schema::ActivationType_HSWISH:
402+      code.CodeFunction("HSwishFp16", input_tensor_, output_tensor_, count);
403+      break;
404+    case schema::ActivationType_SWISH:
405+      code.CodeFunction("SwishFp16", input_tensor_, output_tensor_, count);
406+      break;
407+    case schema::ActivationType_HSIGMOID:
408+      code.CodeFunction("HSigmoidFp16", input_tensor_, output_tensor_, count);
409+      break;
410+    case schema::ActivationType_ELU:
411+      code.CodeFunction("EluFp16", input_tensor_, output_tensor_, count, activation_parameter->alpha_);
412+      break;
413+    default:
414+      MS_LOG(ERROR) << "Activation type error";
415+      return RET_ERROR;
416+  }
417+  MS_LOG(DEBUG) << "ActivationFP16Code has been called";
418+  context->AppendCode(code.str());
419+  return lite::RET_OK;
420+}
421+
422+REG_OPERATOR_CODER(kARM32, kNumberTypeFloat16, PrimitiveType_Activation, CPUOpCoderCreator<ActivationFP16Coder>)
423+REG_OPERATOR_CODER(kARM64, kNumberTypeFloat16, PrimitiveType_Activation, CPUOpCoderCreator<ActivationFP16Coder>)
424+}  // namespace mindspore::lite::micro::nnacl
425diff --git a/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp16/activation_fp16_coder.h b/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp16/activation_fp16_coder.h
426new file mode 100644
427index 00000000..0390991f
428--- /dev/null
429+++ b/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp16/activation_fp16_coder.h
430@@ -0,0 +1,37 @@
431+/**
432+ * Copyright 2023 Huawei Technologies Co., Ltd
433+ *
434+ * Licensed under the Apache License, Version 2.0 (the "License");
435+ * you may not use this file except in compliance with the License.
436+ * You may obtain a copy of the License at
437+ *
438+ * http://www.apache.org/licenses/LICENSE-2.0
439+ *
440+ * Unless required by applicable law or agreed to in writing, software
441+ * distributed under the License is distributed on an "AS IS" BASIS,
442+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
443+ * See the License for the specific language governing permissions and
444+ * limitations under the License.
445+ */
446+
447+#ifndef MINDSPORE_LITE_TOOLS_CONVERTER_MICRO_CODER_OPCODERS_NNACL_FP16_ACTIVATION_FP16_CODER_H_
448+#define MINDSPORE_LITE_TOOLS_CONVERTER_MICRO_CODER_OPCODERS_NNACL_FP16_ACTIVATION_FP16_CODER_H_
449+
450+#include <vector>
451+#include "coder/opcoders/nnacl/fp32/activation_fp32_coder.h"
452+
453+namespace mindspore::lite::micro::nnacl {
454+class ActivationFP16Coder final : public ActivationFP32Coder {
455+ public:
456+  ActivationFP16Coder(const std::vector<Tensor *> &in_tensors, const std::vector<Tensor *> &out_tensors,
457+                      const LiteGraph::Node *node, size_t node_index, Target target)
458+      : ActivationFP32Coder(in_tensors, out_tensors, node, node_index, target) {}
459+
460+  ~ActivationFP16Coder() override = default;
461+
462+  int Prepare(CoderContext *const context) override;
463+
464+  int DoCode(CoderContext *const context) override;
465+};
466+}  // namespace mindspore::lite::micro::nnacl
467+#endif  // MICRO_CODER_OPCODERS_FP16__CODER_H_
468diff --git a/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp16/arithmetic_fp16_coder.cc b/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp16/arithmetic_fp16_coder.cc
469new file mode 100644
470index 00000000..a9cdde78
471--- /dev/null
472+++ b/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp16/arithmetic_fp16_coder.cc
473@@ -0,0 +1,175 @@
474+/**
475+ * Copyright 2023 Huawei Technologies Co., Ltd
476+ *
477+ * Licensed under the Apache License, Version 2.0 (the "License");
478+ * you may not use this file except in compliance with the License.
479+ * You may obtain a copy of the License at
480+ *
481+ * http://www.apache.org/licenses/LICENSE-2.0
482+ *
483+ * Unless required by applicable law or agreed to in writing, software
484+ * distributed under the License is distributed on an "AS IS" BASIS,
485+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
486+ * See the License for the specific language governing permissions and
487+ * limitations under the License.
488+ */
489+#include "coder/opcoders/nnacl/fp16/arithmetic_fp16_coder.h"
490+#include "coder/opcoders/file_collector.h"
491+#include "coder/opcoders/parallel.h"
492+#include "coder/log.h"
493+#include "nnacl/broadcast_to_parameter.h"
494+
495+namespace mindspore::lite::micro::nnacl {
496+void ArithmeticFP16Coder::InitFunTable() {
497+  fun_table_ = {
498+    {PrimitiveType_MulFusion, schema::ActivationType_RELU, "ElementMulReluFp16", "", "", "", ""},
499+    {PrimitiveType_MulFusion, schema::ActivationType_RELU6, "ElementMulRelu6Fp16", "", "", "", ""},
500+    {PrimitiveType_MulFusion, schema::ActivationType_NO_ACTIVATION, "ElementMulFp16", "", "", "", ""},
501+    {PrimitiveType_AddFusion, schema::ActivationType_RELU, "ElementAddReluFp16", "", "", "", ""},
502+    {PrimitiveType_AddFusion, schema::ActivationType_RELU6, "ElementAddRelu6Fp16", "", "", "", ""},
503+    {PrimitiveType_AddFusion, schema::ActivationType_NO_ACTIVATION, "ElementAddFp16", "", "", "", ""},
504+    {PrimitiveType_SubFusion, schema::ActivationType_RELU, "ElementSubReluFp16", "", "", "", ""},
505+    {PrimitiveType_SubFusion, schema::ActivationType_RELU6, "ElementSubRelu6Fp16", "", "", "", ""},
506+    {PrimitiveType_SubFusion, schema::ActivationType_NO_ACTIVATION, "ElementSubFp16", "", "", "", ""},
507+    {PrimitiveType_DivFusion, schema::ActivationType_RELU, "ElementDivReluFp16", "", "", "", ""},
508+    {PrimitiveType_DivFusion, schema::ActivationType_RELU6, "ElementDivRelu6Fp16", "", "", "", ""},
509+    {PrimitiveType_DivFusion, schema::ActivationType_NO_ACTIVATION, "ElementDivFp16", "", "", "", ""},
510+    {PrimitiveType_RealDiv, schema::ActivationType_RELU, "ElementDivReluFp16", "", "", "", ""},
511+    {PrimitiveType_RealDiv, schema::ActivationType_RELU6, "ElementDivRelu6Fp16", "", "", "", ""},
512+    {PrimitiveType_RealDiv, schema::ActivationType_NO_ACTIVATION, "ElementDivFp16", "", "", "", ""},
513+    {PrimitiveType_LogicalAnd, schema::ActivationType_NO_ACTIVATION, "ElementLogicalAndFp16", "", "", "", ""},
514+    {PrimitiveType_LogicalOr, schema::ActivationType_NO_ACTIVATION, "ElementLogicalOrFp16", "", "", "", ""},
515+    {PrimitiveType_Maximum, schema::ActivationType_NO_ACTIVATION, "ElementMaximumFp16", "", "", "", ""},
516+    {PrimitiveType_Minimum, schema::ActivationType_NO_ACTIVATION, "ElementMinimumFp16", "", "", "", ""},
517+    {PrimitiveType_FloorMod, schema::ActivationType_NO_ACTIVATION, "ElementFloorModFp16", "", "", "", ""},
518+    {PrimitiveType_FloorDiv, schema::ActivationType_NO_ACTIVATION, "ElementFloorDivFp16", "", "", "", ""},
519+    {PrimitiveType_SquaredDifference, schema::ActivationType_NO_ACTIVATION, "ElementSquaredDifferenceFp16", "", "", "",
520+     ""}};
521+}
522+
523+int ArithmeticFP16Coder::Prepare(CoderContext *const context) {
524+  if (input_tensor_->data_type() != kNumberTypeFloat16 ||
525+      input_tensors_.at(kWeightIndex)->data_type() != kNumberTypeFloat16 ||
526+      output_tensor_->data_type() != kNumberTypeFloat16) {
527+    MS_LOG(ERROR) << "Tensor data type is invalid";
528+    return lite::RET_INPUT_PARAM_INVALID;
529+  }
530+  return ArithmeticFP32Coder::Prepare(context);
531+}
532+
533+int ArithmeticFP16Coder::ReSize(CoderContext *const context) {
534+  CalcMultiplesAndStrides(arithmetic_parameter_);
535+  return RET_OK;
536+}
537+
538+int ArithmeticFP16Coder::ExecuteCode(const std::string &input0, const std::string &input1, const std::string &output,
539+                                     int size, CoderContext *const context, NNaclFp32Serializer *const code) {
540+  if (arithmetic_func_str_.empty()) {
541+    return RET_ERROR;
542+  }
543+  for (size_t i = 0; i < fun_table_.size(); i++) {
544+    if (fun_table_[i].primitive_type_ == arithmetic_parameter_->op_parameter_.type_ &&
545+        fun_table_[i].activation_type_ == arithmetic_parameter_->activation_type_) {
546+      code->CodeFunction(fun_table_[i].func_, input0, input1, output, size);
547+      break;
548+    }
549+  }
550+  context->AppendCode(code->str());
551+  return RET_OK;
552+}
553+
554+int ArithmeticFP16Coder::DoCode(CoderContext *const context) {
555+  int element_num = output_tensor_->ElementsNum();
556+  input0_ptr_str_ = allocator_->GetRuntimeAddr(input_tensor_, input_tensor_->IsConst());
557+  input1_ptr_str_ = allocator_->GetRuntimeAddr(filter_tensor_, filter_tensor_->IsConst());
558+  output_ptr_str_ = allocator_->GetRuntimeAddr(output_tensor_);
559+  NNaclFp32Serializer code;
560+  Collect(context,
561+          {
562+            "nnacl/fp16/arithmetic_fp16.h",
563+            "nnacl/base/broadcast_to.h",
564+          },
565+          {
566+            "arithmetic_fp16.c",
567+            "arithmetic_base.c",
568+            "broadcast_to.c",
569+          });
570+
571+  // all elements eltwise calculation
572+  ChooseArithmeticFunc(false);
573+  auto in0_shape = input_tensor_->shape();
574+  auto in1_shape = filter_tensor_->shape();
575+  auto out_shape = output_tensor_->shape();
576+  BroadcastShapeInfo broadcast_info;
577+  auto ret = memset_s(&broadcast_info, sizeof(BroadcastShapeInfo), 0, sizeof(BroadcastShapeInfo));
578+  MS_CHECK_TRUE_MSG(ret == EOK, RET_ERROR, "memset failed.");
579+  ret = memcpy_s(broadcast_info.output_shape_, MAX_SHAPE_SIZE * sizeof(int), out_shape.data(),
580+                 out_shape.size() * sizeof(int));
581+  MS_CHECK_TRUE_MSG(ret == EOK, RET_ERROR, "memcpy output-info failed.");
582+  broadcast_info.output_shape_size_ = static_cast<int>(out_shape.size());
583+  if (in0_shape != out_shape) {
584+    ret = memcpy_s(broadcast_info.input_shape_, MAX_SHAPE_SIZE * sizeof(int), in0_shape.data(),
585+                   in0_shape.size() * sizeof(int));
586+    MS_CHECK_TRUE_MSG(ret == EOK, RET_ERROR, "memcpy in0-info failed.");
587+    broadcast_info.input_shape_size_ = static_cast<int>(in0_shape.size());
588+    code.CodeStruct("in0_broadcast_info", broadcast_info);
589+    code.CodeFunction("BroadcastToSize16", input0_ptr_str_, "&in0_broadcast_info", output_ptr_str_);
590+    input0_ptr_str_ = output_ptr_str_;
591+  }
592+  if (in1_shape != out_shape) {
593+    ret = memcpy_s(broadcast_info.input_shape_, MAX_SHAPE_SIZE * sizeof(int), in1_shape.data(),
594+                   in1_shape.size() * sizeof(int));
595+    MS_CHECK_TRUE_MSG(ret == EOK, RET_ERROR, "memcpy in1-info failed.");
596+    broadcast_info.input_shape_size_ = static_cast<int>(in1_shape.size());
597+    code.CodeStruct("in1_broadcast_info", broadcast_info);
598+    auto temp = output_ptr_str_;
599+    if (input0_ptr_str_ == output_ptr_str_) {
600+      auto temp_data = allocator_->Malloc(kNumberTypeFloat16, output_tensor_->Size(), kWorkspace);
601+      MS_CHECK_TRUE_MSG(temp_data != nullptr, RET_NULL_PTR, "malloc running buffer failed.");
602+      temp = allocator_->GetRuntimeAddr(temp_data);
603+    }
604+    code.CodeFunction("BroadcastToSize16", input1_ptr_str_, "&in1_broadcast_info", temp);
605+    input1_ptr_str_ = temp;
606+  }
607+  return ExecuteCode(input0_ptr_str_, input1_ptr_str_, output_ptr_str_, element_num, context, &code);
608+}
609+
610+REG_OPERATOR_CODER(kARM32, kNumberTypeFloat16, PrimitiveType_AddFusion, CPUOpCoderCreator<ArithmeticFP16Coder>)
611+REG_OPERATOR_CODER(kARM32, kNumberTypeFloat16, PrimitiveType_MulFusion, CPUOpCoderCreator<ArithmeticFP16Coder>)
612+REG_OPERATOR_CODER(kARM32, kNumberTypeFloat16, PrimitiveType_SubFusion, CPUOpCoderCreator<ArithmeticFP16Coder>)
613+REG_OPERATOR_CODER(kARM32, kNumberTypeFloat16, PrimitiveType_DivFusion, CPUOpCoderCreator<ArithmeticFP16Coder>)
614+REG_OPERATOR_CODER(kARM32, kNumberTypeFloat16, PrimitiveType_RealDiv, CPUOpCoderCreator<ArithmeticFP16Coder>)
615+REG_OPERATOR_CODER(kARM32, kNumberTypeFloat16, PrimitiveType_LogicalAnd, CPUOpCoderCreator<ArithmeticFP16Coder>)
616+REG_OPERATOR_CODER(kARM32, kNumberTypeFloat16, PrimitiveType_LogicalOr, CPUOpCoderCreator<ArithmeticFP16Coder>)
617+REG_OPERATOR_CODER(kARM32, kNumberTypeFloat16, PrimitiveType_Maximum, CPUOpCoderCreator<ArithmeticFP16Coder>)
618+REG_OPERATOR_CODER(kARM32, kNumberTypeFloat16, PrimitiveType_Minimum, CPUOpCoderCreator<ArithmeticFP16Coder>)
619+REG_OPERATOR_CODER(kARM32, kNumberTypeFloat16, PrimitiveType_FloorDiv, CPUOpCoderCreator<ArithmeticFP16Coder>)
620+REG_OPERATOR_CODER(kARM32, kNumberTypeFloat16, PrimitiveType_FloorMod, CPUOpCoderCreator<ArithmeticFP16Coder>)
621+REG_OPERATOR_CODER(kARM32, kNumberTypeFloat16, PrimitiveType_SquaredDifference, CPUOpCoderCreator<ArithmeticFP16Coder>)
622+REG_OPERATOR_CODER(kARM32, kNumberTypeFloat16, PrimitiveType_Equal, CPUOpCoderCreator<ArithmeticFP16Coder>)
623+REG_OPERATOR_CODER(kARM32, kNumberTypeFloat16, PrimitiveType_NotEqual, CPUOpCoderCreator<ArithmeticFP16Coder>)
624+REG_OPERATOR_CODER(kARM32, kNumberTypeFloat16, PrimitiveType_Less, CPUOpCoderCreator<ArithmeticFP16Coder>)
625+REG_OPERATOR_CODER(kARM32, kNumberTypeFloat16, PrimitiveType_LessEqual, CPUOpCoderCreator<ArithmeticFP16Coder>)
626+REG_OPERATOR_CODER(kARM32, kNumberTypeFloat16, PrimitiveType_Greater, CPUOpCoderCreator<ArithmeticFP16Coder>)
627+REG_OPERATOR_CODER(kARM32, kNumberTypeFloat16, PrimitiveType_GreaterEqual, CPUOpCoderCreator<ArithmeticFP16Coder>)
628+REG_OPERATOR_CODER(kARM32, kNumberTypeFloat16, PrimitiveType_Eltwise, CPUOpCoderCreator<ArithmeticFP16Coder>)
629+REG_OPERATOR_CODER(kARM64, kNumberTypeFloat16, PrimitiveType_AddFusion, CPUOpCoderCreator<ArithmeticFP16Coder>)
630+REG_OPERATOR_CODER(kARM64, kNumberTypeFloat16, PrimitiveType_MulFusion, CPUOpCoderCreator<ArithmeticFP16Coder>)
631+REG_OPERATOR_CODER(kARM64, kNumberTypeFloat16, PrimitiveType_SubFusion, CPUOpCoderCreator<ArithmeticFP16Coder>)
632+REG_OPERATOR_CODER(kARM64, kNumberTypeFloat16, PrimitiveType_DivFusion, CPUOpCoderCreator<ArithmeticFP16Coder>)
633+REG_OPERATOR_CODER(kARM64, kNumberTypeFloat16, PrimitiveType_RealDiv, CPUOpCoderCreator<ArithmeticFP16Coder>)
634+REG_OPERATOR_CODER(kARM64, kNumberTypeFloat16, PrimitiveType_LogicalAnd, CPUOpCoderCreator<ArithmeticFP16Coder>)
635+REG_OPERATOR_CODER(kARM64, kNumberTypeFloat16, PrimitiveType_LogicalOr, CPUOpCoderCreator<ArithmeticFP16Coder>)
636+REG_OPERATOR_CODER(kARM64, kNumberTypeFloat16, PrimitiveType_Maximum, CPUOpCoderCreator<ArithmeticFP16Coder>)
637+REG_OPERATOR_CODER(kARM64, kNumberTypeFloat16, PrimitiveType_Minimum, CPUOpCoderCreator<ArithmeticFP16Coder>)
638+REG_OPERATOR_CODER(kARM64, kNumberTypeFloat16, PrimitiveType_FloorDiv, CPUOpCoderCreator<ArithmeticFP16Coder>)
639+REG_OPERATOR_CODER(kARM64, kNumberTypeFloat16, PrimitiveType_FloorMod, CPUOpCoderCreator<ArithmeticFP16Coder>)
640+REG_OPERATOR_CODER(kARM64, kNumberTypeFloat16, PrimitiveType_SquaredDifference, CPUOpCoderCreator<ArithmeticFP16Coder>)
641+REG_OPERATOR_CODER(kARM64, kNumberTypeFloat16, PrimitiveType_Equal, CPUOpCoderCreator<ArithmeticFP16Coder>)
642+REG_OPERATOR_CODER(kARM64, kNumberTypeFloat16, PrimitiveType_NotEqual, CPUOpCoderCreator<ArithmeticFP16Coder>)
643+REG_OPERATOR_CODER(kARM64, kNumberTypeFloat16, PrimitiveType_Less, CPUOpCoderCreator<ArithmeticFP16Coder>)
644+REG_OPERATOR_CODER(kARM64, kNumberTypeFloat16, PrimitiveType_LessEqual, CPUOpCoderCreator<ArithmeticFP16Coder>)
645+REG_OPERATOR_CODER(kARM64, kNumberTypeFloat16, PrimitiveType_Greater, CPUOpCoderCreator<ArithmeticFP16Coder>)
646+REG_OPERATOR_CODER(kARM64, kNumberTypeFloat16, PrimitiveType_GreaterEqual, CPUOpCoderCreator<ArithmeticFP16Coder>)
647+REG_OPERATOR_CODER(kARM64, kNumberTypeFloat16, PrimitiveType_Eltwise, CPUOpCoderCreator<ArithmeticFP16Coder>)
648+}  // namespace mindspore::lite::micro::nnacl
649diff --git a/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp16/arithmetic_fp16_coder.h b/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp16/arithmetic_fp16_coder.h
650new file mode 100644
651index 00000000..60a83419
652--- /dev/null
653+++ b/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp16/arithmetic_fp16_coder.h
654@@ -0,0 +1,47 @@
655+/**
656+ * Copyright 2023 Huawei Technologies Co., Ltd
657+ *
658+ * Licensed under the Apache License, Version 2.0 (the "License");
659+ * you may not use this file except in compliance with the License.
660+ * You may obtain a copy of the License at
661+ *
662+ * http://www.apache.org/licenses/LICENSE-2.0
663+ *
664+ * Unless required by applicable law or agreed to in writing, software
665+ * distributed under the License is distributed on an "AS IS" BASIS,
666+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
667+ * See the License for the specific language governing permissions and
668+ * limitations under the License.
669+ */
670+
671+#ifndef MINDSPORE_LITE_TOOLS_CONVERTER_MICRO_CODER_OPCODERS_NNACL_FP16_ARITHMETIC_FP16_CODER_H_
672+#define MINDSPORE_LITE_TOOLS_CONVERTER_MICRO_CODER_OPCODERS_NNACL_FP16_ARITHMETIC_FP16_CODER_H_
673+
674+#include <vector>
675+#include <string>
676+#include "coder/opcoders/nnacl/fp32/arithmetic_fp32_coder.h"
677+#include "nnacl/base/cast_base.h"
678+#include "coder/opcoders/serializers/nnacl_serializer/nnacl_fp32_serializer.h"
679+namespace mindspore::lite::micro::nnacl {
680+class ArithmeticFP16Coder final : public ArithmeticFP32Coder {
681+ public:
682+  ArithmeticFP16Coder(const std::vector<Tensor *> &in_tensors, const std::vector<Tensor *> &out_tensors,
683+                      const LiteGraph::Node *node, size_t node_index, Target target)
684+      : ArithmeticFP32Coder(in_tensors, out_tensors, node, node_index, target) {}
685+
686+  ~ArithmeticFP16Coder() override = default;
687+
688+  int DoCode(CoderContext *const context) override;
689+
690+ private:
691+  int Prepare(CoderContext *const context) override;
692+
693+  int ReSize(CoderContext *const context) override;
694+
695+  void InitFunTable() override;
696+
697+  int ExecuteCode(const std::string &input0, const std::string &input1, const std::string &output, int size,
698+                  CoderContext *const context, NNaclFp32Serializer *const code);
699+};
700+}  // namespace mindspore::lite::micro::nnacl
701+#endif  // MINDSPORE_LITE_TOOLS_CONVERTER_MICRO_CODER_OPCODERS_NNACL_FP16_ARITHMETIC_FP16_CODER_H_
702diff --git a/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp16/avg_pooling_fp16_coder.cc b/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp16/avg_pooling_fp16_coder.cc
703new file mode 100644
704index 00000000..97ca75a8
705--- /dev/null
706+++ b/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp16/avg_pooling_fp16_coder.cc
707@@ -0,0 +1,87 @@
708+/**
709+ * Copyright 2023 Huawei Technologies Co., Ltd
710+ *
711+ * Licensed under the Apache License, Version 2.0 (the "License");
712+ * you may not use this file except in compliance with the License.
713+ * You may obtain a copy of the License at
714+ *
715+ * http://www.apache.org/licenses/LICENSE-2.0
716+ *
717+ * Unless required by applicable law or agreed to in writing, software
718+ * distributed under the License is distributed on an "AS IS" BASIS,
719+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
720+ * See the License for the specific language governing permissions and
721+ * limitations under the License.
722+ */
723+#include "coder/opcoders/nnacl/fp16/avg_pooling_fp16_coder.h"
724+#include <cfloat>
725+#include <string>
726+#include "coder/opcoders/serializers/nnacl_serializer/nnacl_fp32_serializer.h"
727+#include "coder/log.h"
728+#include "coder/opcoders/parallel.h"
729+#include "coder/opcoders/file_collector.h"
730+
731+using mindspore::schema::PrimitiveType_AvgPoolFusion;
732+
733+namespace mindspore::lite::micro::nnacl {
734+int PoolingFP16Coder::Prepare(CoderContext *const context) {
735+  if (input_tensor_->data_type() != kNumberTypeFloat16) {
736+    MS_LOG(INFO) << "Input tensor data type is invalid";
737+    return RET_INPUT_PARAM_INVALID;
738+  }
739+  return RET_OK;
740+}
741+
742+int PoolingFP16Coder::DoCode(CoderContext *const context) {
743+  // attribute
744+  auto pooling_parameter = reinterpret_cast<PoolingParameter *>(parameter_);
745+  MS_CHECK_PTR(pooling_parameter);
746+  // init struct PoolingParameters
747+  pooling_parameter->input_batch_ = input_tensor_->Batch();
748+  pooling_parameter->input_channel_ = input_tensor_->Channel();
749+  pooling_parameter->input_h_ = input_tensor_->Height();
750+  pooling_parameter->input_w_ = input_tensor_->Width();
751+  pooling_parameter->output_batch_ = output_tensor_->Batch();
752+  pooling_parameter->output_channel_ = output_tensor_->Channel();
753+  pooling_parameter->output_h_ = output_tensor_->Height();
754+  pooling_parameter->output_w_ = output_tensor_->Width();
755+
756+  pooling_parameter->thread_num_ = pooling_parameter->op_parameter_.thread_num_;
757+
758+  NNaclFp32Serializer code;
759+  std::string param_name = "pooling_parameter";
760+  code.CodeStruct(param_name, *pooling_parameter);
761+  float minf = -FLT16_MAX;
762+  float maxf = FLT16_MAX;
763+  Collect(context,
764+          {
765+            "nnacl/fp16/pooling_fp16.h",
766+          },
767+          {
768+            "pooling_fp16.c",
769+          });
770+  switch (pooling_parameter->act_type_) {
771+    case ActType_Relu: {
772+      minf = 0.f;
773+      break;
774+    }
775+    case ActType_Relu6: {
776+      minf = 0.f;
777+      maxf = 6.f;
778+      break;
779+    }
780+    default: {
781+      MS_LOG(INFO) << "no actype";
782+      break;
783+    }
784+  }
785+  code.CodeFunction("AvgPoolingFp16", input_tensor_, output_tensor_, "&pooling_parameter", kDefaultTaskId, minf, maxf);
786+
787+  MS_LOG(INFO) << "PoolingFp16Code has been called";
788+  context->AppendCode(code.str());
789+  return lite::RET_OK;
790+}
791+
792+REG_OPERATOR_CODER(kARM32, kNumberTypeFloat16, PrimitiveType_AvgPoolFusion, CPUOpCoderCreator<PoolingFP16Coder>)
793+REG_OPERATOR_CODER(kARM64, kNumberTypeFloat16, PrimitiveType_AvgPoolFusion, CPUOpCoderCreator<PoolingFP16Coder>)
794+}  // namespace mindspore::lite::micro::nnacl
795diff --git a/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp16/avg_pooling_fp16_coder.h b/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp16/avg_pooling_fp16_coder.h
796new file mode 100644
797index 00000000..65a6522d
798--- /dev/null
799+++ b/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp16/avg_pooling_fp16_coder.h
800@@ -0,0 +1,36 @@
801+/**
802+ * Copyright 2023 Huawei Technologies Co., Ltd
803+ *
804+ * Licensed under the Apache License, Version 2.0 (the "License");
805+ * you may not use this file except in compliance with the License.
806+ * You may obtain a copy of the License at
807+ *
808+ * http://www.apache.org/licenses/LICENSE-2.0
809+ *
810+ * Unless required by applicable law or agreed to in writing, software
811+ * distributed under the License is distributed on an "AS IS" BASIS,
812+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
813+ * See the License for the specific language governing permissions and
814+ * limitations under the License.
815+ */
816+
817+#ifndef MINDSPORE_LITE_TOOLS_CONVERTER_MICRO_CODER_OPCODERS_NNACL_FP16_POOLING_FP16_CODER_H_
818+#define MINDSPORE_LITE_TOOLS_CONVERTER_MICRO_CODER_OPCODERS_NNACL_FP16_POOLING_FP16_CODER_H_
819+
820+#include <vector>
821+#include "coder/opcoders/nnacl/fp32/pooling_fp32_coder.h"
822+
823+namespace mindspore::lite::micro::nnacl {
824+class PoolingFP16Coder final : public PoolingFP32Coder {
825+ public:
826+  PoolingFP16Coder(const std::vector<Tensor *> &in_tensors, const std::vector<Tensor *> &out_tensors,
827+                   const LiteGraph::Node *node, size_t node_index, Target target)
828+      : PoolingFP32Coder(in_tensors, out_tensors, node, node_index, target) {}
829+  ~PoolingFP16Coder() override = default;
830+
831+  int Prepare(CoderContext *const context) override;
832+
833+  int DoCode(CoderContext *const context) override;
834+};
835+}  // namespace mindspore::lite::micro::nnacl
836+#endif  // MINDSPORE_LITE_MICRO_CODER_OPCODERS_FP16_CODER_H_
837diff --git a/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp16/concat_fp16_coder.cc b/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp16/concat_fp16_coder.cc
838new file mode 100644
839index 00000000..fd969963
840--- /dev/null
841+++ b/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp16/concat_fp16_coder.cc
842@@ -0,0 +1,88 @@
843+/**
844+ * Copyright 2023 Huawei Technologies Co., Ltd
845+ *
846+ * Licensed under the Apache License, Version 2.0 (the "License");
847+ * you may not use this file except in compliance with the License.
848+ * You may obtain a copy of the License at
849+ *
850+ * http://www.apache.org/licenses/LICENSE-2.0
851+ *
852+ * Unless required by applicable law or agreed to in writing, software
853+ * distributed under the License is distributed on an "AS IS" BASIS,
854+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
855+ * See the License for the specific language governing permissions and
856+ * limitations under the License.
857+ */
858+#include "coder/opcoders/nnacl/fp16/concat_fp16_coder.h"
859+#include <string>
860+#include <vector>
861+#include "coder/opcoders/serializers/nnacl_serializer/nnacl_fp32_serializer.h"
862+#include "coder/opcoders/file_collector.h"
863+#include "coder/opcoders/parallel.h"
864+
865+using mindspore::schema::PrimitiveType_Concat;
866+
867+namespace mindspore::lite::micro::nnacl {
868+int ConcatFP16Coder::Prepare(CoderContext *const context) {
869+  if (input_tensor_->data_type() != kNumberTypeFloat16) {
870+    MS_LOG(INFO) << "Input tensor data type is invalid";
871+    return lite::RET_INPUT_PARAM_INVALID;
872+  }
873+  concat_param_ = reinterpret_cast<ConcatParameter *>(parameter_);
874+  MS_CHECK_PTR(concat_param_);
875+  return ReSize();
876+}
877+
878+int ConcatFP16Coder::ReSize() {
879+  axis_ = concat_param_->axis_ >= 0 ? concat_param_->axis_
880+                                    : static_cast<int>(input_tensor_->shape().size()) + concat_param_->axis_;
881+  return RET_OK;
882+}
883+
884+int ConcatFP16Coder::DoCode(CoderContext *const context) {
885+  Collect(context,
886+          {
887+            "nnacl/base/concat_base.h",
888+          },
889+          {
890+            "concat_base.c",
891+          });
892+
893+  size_t input_num = input_tensors_.size();
894+
895+  NNaclFp32Serializer code;
896+  code << "\t\tvoid *inputs_addr[] = {";
897+  for (size_t i = 0; i < input_num; ++i) {
898+    code << allocator_->GetRuntimeAddr(input_tensors_.at(i)) << ", ";
899+  }
900+  code << "};\n";
901+
902+  size_t i;
903+  for (i = 0; i < input_num; ++i) {
904+    code << "\t\tint shape_" << i << "[] = {";
905+    for (auto &shape : input_tensors_.at(i)->shape()) {
906+      code << shape << ", ";
907+    }
908+    code << "};\n";
909+  }
910+
911+  code << "\t\tint shape_" << i << "[] = {";
912+  for (auto &shape : output_tensor_->shape()) {
913+    code << shape << ", ";
914+  }
915+  code << "};\n";
916+
917+  code << "\t\tint *inputs_output_shape[] = {";
918+  for (i = 0; i <= input_num; ++i) {
919+    code << "shape_" << i << ", ";
920+  }
921+  code << "};\n";
922+  code.CodeFunction("Concat", "inputs_addr", input_num, axis_, "inputs_output_shape", output_tensor_->shape().size(),
923+                    output_tensor_, 0, 1, sizeof(uint16_t));
924+  context->AppendCode(code.str());
925+  return RET_OK;
926+}
927+
928+REG_OPERATOR_CODER(kARM32, kNumberTypeFloat16, PrimitiveType_Concat, CPUOpCoderCreator<ConcatFP16Coder>)
929+REG_OPERATOR_CODER(kARM64, kNumberTypeFloat16, PrimitiveType_Concat, CPUOpCoderCreator<ConcatFP16Coder>)
930+}  // namespace mindspore::lite::micro::nnacl
931diff --git a/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp16/concat_fp16_coder.h b/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp16/concat_fp16_coder.h
932new file mode 100644
933index 00000000..6428ac6f
934--- /dev/null
935+++ b/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp16/concat_fp16_coder.h
936@@ -0,0 +1,42 @@
937+/**
938+ * Copyright 2023 Huawei Technologies Co., Ltd
939+ *
940+ * Licensed under the Apache License, Version 2.0 (the "License");
941+ * you may not use this file except in compliance with the License.
942+ * You may obtain a copy of the License at
943+ *
944+ * http://www.apache.org/licenses/LICENSE-2.0
945+ *
946+ * Unless required by applicable law or agreed to in writing, software
947+ * distributed under the License is distributed on an "AS IS" BASIS,
948+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
949+ * See the License for the specific language governing permissions and
950+ * limitations under the License.
951+ */
952+
953+#ifndef MINDSPORE_LITE_TOOLS_CONVERTER_MICRO_CODER_OPCODERS_NNACL_FP16_CONCAT_FP16_CODER_H_
954+#define MINDSPORE_LITE_TOOLS_CONVERTER_MICRO_CODER_OPCODERS_NNACL_FP16_CONCAT_FP16_CODER_H_
955+
956+#include <vector>
957+#include "coder/opcoders/nnacl/fp32/concat_fp32_coder.h"
958+#include "nnacl/concat_parameter.h"
959+
960+namespace mindspore::lite::micro::nnacl {
961+class ConcatFP16Coder final : public ConcatFP32Coder {
962+ public:
963+  ConcatFP16Coder(const std::vector<Tensor *> &in_tensors, const std::vector<Tensor *> &out_tensors,
964+                  const LiteGraph::Node *node, size_t node_index, Target target)
965+      : ConcatFP32Coder(in_tensors, out_tensors, node, node_index, target) {}
966+  ~ConcatFP16Coder() override = default;
967+
968+  int Prepare(CoderContext *const context) override;
969+  int DoCode(CoderContext *const context) override;
970+
971+ private:
972+  int ReSize();
973+
974+  int axis_{0};
975+  ConcatParameter *concat_param_{nullptr};
976+};
977+}  // namespace mindspore::lite::micro::nnacl
978+#endif  // MINDSPORE_LITE_TOOLS_CONVERTER_MICRO_CODER_OPCODERS_NNACL_FP16_CONCAT_FP16_CODER_H_
979diff --git a/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp16/layernorm_fp16_coder.cc b/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp16/layernorm_fp16_coder.cc
980new file mode 100644
981index 00000000..8140786b
982--- /dev/null
983+++ b/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp16/layernorm_fp16_coder.cc
984@@ -0,0 +1,56 @@
985+/**
986+ * Copyright 2023 Huawei Technologies Co., Ltd
987+ *
988+ * Licensed under the Apache License, Version 2.0 (the "License");
989+ * you may not use this file except in compliance with the License.
990+ * You may obtain a copy of the License at
991+ *
992+ * http://www.apache.org/licenses/LICENSE-2.0
993+ *
994+ * Unless required by applicable law or agreed to in writing, software
995+ * distributed under the License is distributed on an "AS IS" BASIS,
996+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
997+ * See the License for the specific language governing permissions and
998+ * limitations under the License.
999+ */
1000+#include "coder/opcoders/nnacl/fp16/layernorm_fp16_coder.h"
1001+#include "coder/opcoders/serializers/nnacl_serializer/nnacl_fp32_serializer.h"
1002+#include "coder/opcoders/file_collector.h"
1003+#include "coder/opcoders/parallel.h"
1004+
1005+using mindspore::schema::PrimitiveType_LayerNormFusion;
1006+
1007+namespace mindspore::lite::micro::nnacl {
1008+int LayerNormFP16Coder::Prepare(CoderContext *const context) {
1009+  if ((input_tensor_->data_type() != kNumberTypeFloat16) ||
1010+      (input_tensors_.at(SECOND_INPUT)->data_type() != kNumberTypeFloat16) ||
1011+      (input_tensors_.at(THIRD_INPUT)->data_type() != kNumberTypeFloat16)) {
1012+    MS_LOG(INFO) << "Input tensors data type is invalid";
1013+    return RET_INPUT_PARAM_INVALID;
1014+  }
1015+  return LayerNormFP32Coder::Prepare(context);
1016+}
1017+
1018+int LayerNormFP16Coder::DoCode(CoderContext *const context) {
1019+  NNaclFp32Serializer code;
1020+  code.CodeStruct("layer_norm_parm", *param_);
1021+  Collect(context, {"nnacl/fp16/layer_norm_fp16.h"}, {"layer_norm_fp16.c"});
1022+
1023+  if (output_tensors_.size() == C3NUM) {
1024+    code.CodeFunction("LayerNormFp16", input_tensor_, input_tensors_.at(SECOND_INPUT), input_tensors_.at(THIRD_INPUT),
1025+                      output_tensor_, output_tensors_.at(SECOND_INPUT), output_tensors_.at(THIRD_INPUT),
1026+                      "&layer_norm_parm", 0);
1027+  } else if (output_tensors_.size() == 1) {
1028+    code.CodeFunction("LayerNormFp16", input_tensor_, input_tensors_.at(SECOND_INPUT), input_tensors_.at(THIRD_INPUT),
1029+                      output_tensor_, "NULL", "NULL", "&layer_norm_parm", 0);
1030+  } else {
1031+    MS_LOG(ERROR) << "LayerNorm should have 1 or 3 output tensors";
1032+    return RET_ERROR;
1033+  }
1034+  context->AppendCode(code.str());
1035+  return RET_OK;
1036+}
1037+
1038+REG_OPERATOR_CODER(kARM32, kNumberTypeFloat16, PrimitiveType_LayerNormFusion, CPUOpCoderCreator<LayerNormFP16Coder>)
1039+REG_OPERATOR_CODER(kARM64, kNumberTypeFloat16, PrimitiveType_LayerNormFusion, CPUOpCoderCreator<LayerNormFP16Coder>)
1040+}  // namespace mindspore::lite::micro::nnacl
1041diff --git a/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp16/layernorm_fp16_coder.h b/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp16/layernorm_fp16_coder.h
1042new file mode 100644
1043index 00000000..df025e3c
1044--- /dev/null
1045+++ b/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp16/layernorm_fp16_coder.h
1046@@ -0,0 +1,37 @@
1047+/**
1048+ * Copyright 2023 Huawei Technologies Co., Ltd
1049+ *
1050+ * Licensed under the Apache License, Version 2.0 (the "License");
1051+ * you may not use this file except in compliance with the License.
1052+ * You may obtain a copy of the License at
1053+ *
1054+ * http://www.apache.org/licenses/LICENSE-2.0
1055+ *
1056+ * Unless required by applicable law or agreed to in writing, software
1057+ * distributed under the License is distributed on an "AS IS" BASIS,
1058+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1059+ * See the License for the specific language governing permissions and
1060+ * limitations under the License.
1061+ */
1062+
1063+#ifndef MINDSPORE_LITE_TOOLS_CONVERTER_MICRO_CODER_OPCODERS_NNACL_FP16_LAYERNORM_FP16_CODER_H_
1064+#define MINDSPORE_LITE_TOOLS_CONVERTER_MICRO_CODER_OPCODERS_NNACL_FP16_LAYERNORM_FP16_CODER_H_
1065+
1066+#include <vector>
1067+#include "coder/opcoders/nnacl/fp32/layernorm_fp32_coder.h"
1068+#include "nnacl/layer_norm_parameter.h"
1069+
1070+namespace mindspore::lite::micro::nnacl {
1071+class LayerNormFP16Coder final : public LayerNormFP32Coder {
1072+ public:
1073+  LayerNormFP16Coder(const std::vector<Tensor *> &in_tensors, const std::vector<Tensor *> &out_tensors,
1074+                     const LiteGraph::Node *node, size_t node_index, Target target)
1075+      : LayerNormFP32Coder(in_tensors, out_tensors, node, node_index, target) {}
1076+  ~LayerNormFP16Coder() override = default;
1077+
1078+  int Prepare(CoderContext *const context) override;
1079+
1080+  int DoCode(CoderContext *const context) override;
1081+};
1082+}  // namespace mindspore::lite::micro::nnacl
1083+#endif  // MINDSPORE_LITE_TOOLS_CONVERTER_MICRO_CODER_OPCODERS_NNACL_FP16_LAYERNORM_FP16_CODER_H_
1084diff --git a/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp16/matmul_fp16_base_coder.cc b/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp16/matmul_fp16_base_coder.cc
1085new file mode 100644
1086index 00000000..f2aec9d2
1087--- /dev/null
1088+++ b/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp16/matmul_fp16_base_coder.cc
1089@@ -0,0 +1,286 @@
1090+/**
1091+ * Copyright 2023 Huawei Technologies Co., Ltd
1092+ *
1093+ * Licensed under the Apache License, Version 2.0 (the "License");
1094+ * you may not use this file except in compliance with the License.
1095+ * You may obtain a copy of the License at
1096+ *
1097+ * http://www.apache.org/licenses/LICENSE-2.0
1098+ *
1099+ * Unless required by applicable law or agreed to in writing, software
1100+ * distributed under the License is distributed on an "AS IS" BASIS,
1101+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1102+ * See the License for the specific language governing permissions and
1103+ * limitations under the License.
1104+ */
1105+
1106+#include "coder/opcoders/nnacl/fp16/matmul_fp16_base_coder.h"
1107+#include <string>
1108+#include <vector>
1109+#include "coder/log.h"
1110+#include "coder/opcoders/parallel.h"
1111+#include "coder/opcoders/file_collector.h"
1112+#include "coder/opcoders/nnacl/dequant/de_quant.h"
1113+#include "nnacl/base/cast_base.h"
1114+
1115+using mindspore::schema::PrimitiveType_MatMulFusion;
1116+
1117+namespace mindspore::lite::micro::nnacl {
1118+int MatMulFP16BaseCoder::InitBiasData() {
1119+  if (bias_ptr_) {
1120+    return RET_OK;
1121+  }
1122+  bias_pack_ptr_size_ = static_cast<size_t>(params_->col_align_ * data_type_size_);
1123+  if (input_tensors_.size() == C3NUM) {
1124+    bias_ptr_ = allocator_->Malloc(kNumberTypeUInt8, kOnlineSize, kOnlinePackWeight,
1125+                                   bias_tensor_->tensor_name() + "_online_pack");
1126+  } else {
1127+    bias_ptr_ =
1128+      allocator_->Malloc(kNumberTypeUInt8, kOnlineSize, kOnlinePackWeight, node_->name_ + "_bias_online_pack");
1129+  }
1130+  return RET_OK;
1131+}
1132+
1133+int MatMulFP16BaseCoder::InitBufferA() {
1134+  if (a_pack_ptr_ != nullptr || vec_matmul_) {
1135+    return RET_OK;
1136+  }
1137+  a_pack_ptr_size_ = static_cast<size_t>(params_->batch * params_->row_align_ * params_->deep_ * sizeof(uint16_t));
1138+  if (params_->a_const_) {
1139+    a_pack_ptr_ = allocator_->GetSharedWeightAddr(input_tensors_.at(0));
1140+    if (a_pack_ptr_ == nullptr) {
1141+      a_pack_ptr_ = allocator_->Malloc(kNumberTypeFloat16, kOnlineSize, kOnlinePackWeight,
1142+                                       input_tensors_.at(0)->tensor_name() + "_online_pack");
1143+      allocator_->MarkSharedWeight(input_tensors_.at(0), a_pack_ptr_);
1144+    } else {
1145+      a_packed_ = true;
1146+    }
1147+  } else {
1148+    a_pack_ptr_ = allocator_->Malloc(kNumberTypeFloat16, a_pack_ptr_size_, kWorkspace);
1149+  }
1150+  MS_CHECK_PTR(a_pack_ptr_);
1151+  return RET_OK;
1152+}
1153+
1154+int MatMulFP16BaseCoder::InitBufferB() {
1155+  if (target_ != kARM64) {
1156+    if (vec_matmul_ && params_->b_transpose_) {
1157+      return RET_OK;
1158+    }
1159+  }
1160+  return MatMulFP32BaseCoder::InitBufferB();
1161+}
1162+
1163+std::string MatMulFP16BaseCoder::InitMatrixA(NNaclFp32Serializer *const code, NNaclFp32Serializer *const init_code,
1164+                                             CoderContext *const context, size_t *w_buf) {
1165+  if (vec_matmul_) {
1166+    return allocator_->GetRuntimeAddr(input_tensor_, input_tensor_->IsConst());
1167+  }
1168+  std::string input_a_str = allocator_->GetRuntimeAddr(input_tensor_);
1169+  std::string input_a_pack_str = "(float16_t *)" + allocator_->GetRuntimeAddr(a_pack_ptr_);
1170+  if (params_->a_const_) {
1171+    init_code->CodeBufferOffsetExpression(a_pack_ptr_, context->weight_name(), context->weight_offset_name(),
1172+                                          context->weight_size_name(), a_pack_ptr_size_);
1173+    *w_buf = *w_buf + a_pack_ptr_size_;
1174+  }
1175+  NNaclFp32Serializer &pack_node = params_->a_const_ ? *init_code : *code;
1176+  if (a_batch_ == 1) {
1177+    if (params_->a_transpose_) {
1178+      if (target_ == kARM64) {
1179+        pack_node.CodeFunction("RowMajor2RowNMajorFp16", input_a_str, input_a_pack_str, params_->deep_, params_->row_);
1180+      } else {
1181+        pack_node.CodeFunction("RowMajor2Row12MajorFp16", input_a_str, input_a_pack_str, params_->deep_, params_->row_,
1182+                               false);
1183+      }
1184+    } else {
1185+      if (target_ == kARM64) {
1186+        pack_node.CodeFunction("RowMajor2ColNMajorFp16", input_a_str, input_a_pack_str, params_->row_, params_->deep_);
1187+      } else {
1188+        pack_node.CodeFunction("RowMajor2Col12MajorFp16", input_a_str, input_a_pack_str, params_->row_, params_->deep_,
1189+                               false);
1190+      }
1191+    }
1192+  } else {
1193+    pack_node << "  for (int i = 0; i < " << a_batch_ << "; ++i) {\n"
1194+              << "    float16_t *src = " << input_a_str << " + i * " << params_->deep_ * params_->row_ << ";\n"
1195+              << "    float16_t *dst = " << input_a_pack_str << " + i * " << params_->deep_ * params_->row_align_
1196+              << ";\n";
1197+    if (params_->a_transpose_) {
1198+      if (target_ == kARM64) {
1199+        pack_node << "    RowMajor2RowNMajorFp16(src, dst, " << params_->deep_ << ", " << params_->row_ << ");\n";
1200+      } else {
1201+        pack_node << "    RowMajor2Row12MajorFp16(src, dst, " << params_->deep_ << ", " << params_->row_
1202+                  << ", false);\n";
1203+      }
1204+    } else {
1205+      if (target_ == kARM64) {
1206+        pack_node << "    RowMajor2ColNMajorFp16(src, dst, " << params_->row_ << ", " << params_->deep_ << ");\n";
1207+      } else {
1208+        pack_node << "    RowMajor2Col12MajorFp16(src, dst, " << params_->row_ << ", " << params_->deep_
1209+                  << ", false);\n";
1210+      }
1211+    }
1212+    pack_node << "  }\n";
1213+  }
1214+  return input_a_pack_str;
1215+}
1216+
1217+std::string MatMulFP16BaseCoder::InitMatrixB(NNaclFp32Serializer *const code, NNaclFp32Serializer *const init_code,
1218+                                             CoderContext *const context, size_t *w_buf) {
1219+  bool no_pack = target_ != kARM64 && vec_matmul_ && params_->b_transpose_;
1220+  if (no_pack) {
1221+    return allocator_->GetRuntimeAddr(filter_tensor_, filter_tensor_->IsConst());
1222+  }
1223+  std::string input_b_str = allocator_->GetRuntimeAddr(filter_tensor_);
1224+  std::string input_b_pack_str = "(float16_t *)" + allocator_->GetRuntimeAddr(b_pack_ptr_);
1225+  if (params_->b_const_) {
1226+    init_code->CodeBufferOffsetExpression(b_pack_ptr_, context->weight_name(), context->weight_offset_name(),
1227+                                          context->weight_size_name(), b_pack_ptr_size_);
1228+    *w_buf = *w_buf + b_pack_ptr_size_;
1229+  }
1230+  NNaclFp32Serializer &pack_node = params_->b_const_ ? *init_code : *code;
1231+  if (target_ != kARM64) {
1232+    if (vec_matmul_) {
1233+      if (b_batch_ == 1) {
1234+        pack_node.CodeFunction("RowMajor2ColMajorFp16", input_b_str, input_b_pack_str, params_->deep_, params_->col_,
1235+                               false);
1236+      } else {
1237+        pack_node << "  for (int i = 0; i < " << b_batch_ << "; ++i) {\n"
1238+                  << "    float16_t *src = " << input_b_str << " + i * " << params_->deep_ * params_->col_ << ";\n"
1239+                  << "    float16_t *dst = " << input_b_pack_str << " + i * " << params_->deep_ * params_->col_ << ";\n"
1240+                  << "    RowMajor2ColMajorFp16(src, dst, " << params_->deep_ << ", " << params_->col_ << ", "
1241+                  << "false);\n"
1242+                  << "  }\n";
1243+      }
1244+      return input_b_pack_str;
1245+    }
1246+  }
1247+
1248+  if (b_batch_ == 1) {
1249+    if (params_->b_transpose_) {
1250+      pack_node.CodeFunction("RowMajor2Col8MajorFp16", input_b_str, input_b_pack_str, params_->col_, params_->deep_,
1251+                             false);
1252+    } else {
1253+      pack_node.CodeFunction("RowMajor2Row8MajorFp16", input_b_str, input_b_pack_str, params_->deep_, params_->col_,
1254+                             false);
1255+    }
1256+  } else {
1257+    pack_node << "  for (int i = 0; i < " << b_batch_ << "; ++i) {\n"
1258+              << "    float16_t *src = " << input_b_str << " + i * " << params_->deep_ * params_->col_ << ";\n"
1259+              << "    float16_t *dst = " << input_b_pack_str << " + i * " << params_->deep_ * params_->col_align_
1260+              << ";\n";
1261+    if (params_->b_transpose_) {
1262+      pack_node << "    RowMajor2Col8MajorFp16(src, dst, " << params_->col_ << ", " << params_->deep_ << ", false);\n";
1263+    } else {
1264+      pack_node << "    RowMajor2Row8MajorFp16(src, dst, " << params_->deep_ << ", " << params_->col_ << ", false);\n";
1265+    }
1266+    pack_node << "  }\n";
1267+  }
1268+  return input_b_pack_str;
1269+}
1270+
1271+int MatMulFP16BaseCoder::Prepare(CoderContext *const context) {
1272+  if (input_tensor_->data_type() != kNumberTypeFloat16 || filter_tensor_->data_type() != kNumberTypeFloat16) {
1273+    MS_LOG(INFO) << "Input tensor data type is invalid";
1274+    return RET_INPUT_PARAM_INVALID;
1275+  }
1276+  row_tile_ = C12NUM;
1277+  if (target_ == kARM64) {
1278+    row_tile_ = C4NUM;
1279+  }
1280+  auto ret = InitAShape();
1281+  MS_CHECK_TRUE_MSG(ret == RET_OK, RET_ERROR, "init A-metrics' info failed");
1282+  ret = InitBShape();
1283+  MS_CHECK_TRUE_MSG(ret == RET_OK, RET_ERROR, "init B-metrics' info failed");
1284+  if (params_->row_ == 1) {
1285+    vec_matmul_ = true;
1286+  }
1287+  if (vec_matmul_) {
1288+    params_->row_align_ = 1;
1289+    params_->col_align_ = (target_ == kARM64) ? UP_ROUND(params_->col_, C8NUM) : params_->col_;
1290+  } else {
1291+    params_->row_align_ = UP_ROUND(params_->row_, row_tile_);
1292+    params_->col_align_ = UP_ROUND(params_->col_, C8NUM);
1293+  }
1294+  MS_CHECK_RET_CODE(InitBufferA(), "InitBufferA failed");
1295+  MS_CHECK_RET_CODE(InitBufferB(), "InitBufferB failed");
1296+  MS_CHECK_RET_CODE(InitBiasData(), "InitBias failed");
1297+  return RET_OK;
1298+}
1299+
1300+int MatMulFP16BaseCoder::CollectFilesForTarget(CoderContext *const context) {
1301+  Collect(context,
1302+          {
1303+            "nnacl/fp16/pack_fp16.h",
1304+            "nnacl/fp16/matmul_fp16.h",
1305+          },
1306+          {
1307+            "pack_fp16.c",
1308+            "matmul_fp16.c",
1309+          });
1310+  if (target_ == kARM32) {
1311+    Collect(context, {}, {},
1312+            {
1313+              "Matmul12x8Fp16.S",
1314+              "MatVecMulFp16.S",
1315+            });
1316+  } else if (target_ == kARM64) {
1317+    Collect(context, {}, {},
1318+            {
1319+              "MatmulFp16.S",
1320+              "MatmulFp16Opt.S",
1321+              "MatVecMulFp16.S",
1322+              "Matmul12X16Fp16.S",
1323+              "MatmulBaseFp16Neon.S",
1324+              "MatmulWinogradFp16.S",
1325+              "VecMatmulFp16.S",
1326+            });
1327+  }
1328+  return RET_OK;
1329+}
1330+
1331+int MatMulFP16BaseCoder::DoCode(CoderContext *const context) {
1332+  CollectFilesForTarget(context);
1333+  NNaclFp32Serializer code, init_code;
1334+  size_t w_buf_size = 0;
1335+
1336+  // do bias packing to init
1337+  init_code.CodeBufferOffsetExpression(bias_ptr_, context->weight_name(), context->weight_offset_name(),
1338+                                       context->weight_size_name(), bias_pack_ptr_size_);
1339+  w_buf_size += bias_pack_ptr_size_;
1340+  std::string bias_str = "(float16_t *)" + allocator_->GetRuntimeAddr(bias_ptr_);
1341+  if (input_tensors_.size() == DIMENSION_3D) {
1342+    auto origin_bias_str = allocator_->GetRuntimeAddr(bias_tensor_);
1343+    init_code.CodeFunction("memcpy", bias_str, origin_bias_str, bias_tensor_->Size());
1344+  } else {
1345+    init_code.CodeFunction("memset", bias_str, 0, bias_pack_ptr_size_);
1346+  }
1347+
1348+  auto input_a_str = InitMatrixA(&code, &init_code, context, &w_buf_size);
1349+  auto input_b_str = InitMatrixB(&code, &init_code, context, &w_buf_size);
1350+  auto output_str = allocator_->GetRuntimeAddr(output_tensor_);
1351+  code << "  for (int i = 0; i < " << params_->batch << "; ++i) {\n";
1352+  if (vec_matmul_) {
1353+    code << "    const float16_t *batch_a_ptr = " << input_a_str << " + i * " << params_->deep_ << ";\n";
1354+    code << "    const float16_t *batch_b_ptr = " << input_b_str << " + i * "
1355+         << params_->deep_ * (target_ == kARM64 ? params_->col_align_ : params_->col_) << ";\n";
1356+    code << "      float16_t *batch_c_ptr = " << output_str << " + i * " << params_->row_ * params_->col_ << ";\n  ";
1357+    code.CodeFunction(target_ == kARM64 ? "VecMatmulFp16" : "MatVecMulFp16", "batch_a_ptr", "batch_b_ptr",
1358+                      "batch_c_ptr", bias_str, params_->act_type_, params_->deep_, params_->col_);
1359+  } else {
1360+    code << "    const float16_t *batch_a_ptr = " << input_a_str << " + i * " << params_->row_align_ * params_->deep_
1361+         << ";\n";
1362+    code << "    const float16_t *batch_b_ptr = " << input_b_str << " + i * " << params_->deep_ * params_->col_align_
1363+         << ";\n";
1364+    code << "    float16_t *batch_c_ptr = " << output_str << " + i * " << params_->row_ * params_->col_ << ";\n  ";
1365+    code.CodeFunction(target_ == kARM64 ? "MatmulBaseFp16Neon" : "MatMulFp16", "batch_a_ptr", "batch_b_ptr",
1366+                      "batch_c_ptr", bias_str, params_->act_type_, params_->deep_, params_->row_, params_->col_,
1367+                      params_->col_, OutType_Nhwc);
1368+  }
1369+  code << "  }\n";
1370+  context->AppendInitWeightSizeCode(w_buf_size);
1371+  context->AppendCode(code.str());
1372+  context->AppendInitCode(init_code.str());
1373+  return RET_OK;
1374+}
1375+}  // namespace mindspore::lite::micro::nnacl
1376diff --git a/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp16/matmul_fp16_base_coder.h b/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp16/matmul_fp16_base_coder.h
1377new file mode 100644
1378index 00000000..864f54ae
1379--- /dev/null
1380+++ b/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp16/matmul_fp16_base_coder.h
1381@@ -0,0 +1,58 @@
1382+/**
1383+ * Copyright 2023 Huawei Technologies Co., Ltd
1384+ *
1385+ * Licensed under the Apache License, Version 2.0 (the "License");
1386+ * you may not use this file except in compliance with the License.
1387+ * You may obtain a copy of the License at
1388+ *
1389+ * http://www.apache.org/licenses/LICENSE-2.0
1390+ *
1391+ * Unless required by applicable law or agreed to in writing, software
1392+ * distributed under the License is distributed on an "AS IS" BASIS,
1393+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1394+ * See the License for the specific language governing permissions and
1395+ * limitations under the License.
1396+ */
1397+
1398+#ifndef MINDSPORE_LITE_TOOLS_CONVERTER_MICRO_CODER_OPCODERS_NNACL_FP16_MATMUL_FP16_BASE_CODER_H_
1399+#define MINDSPORE_LITE_TOOLS_CONVERTER_MICRO_CODER_OPCODERS_NNACL_FP16_MATMUL_FP16_BASE_CODER_H_
1400+
1401+#include <vector>
1402+#include "coder/opcoders/nnacl/fp32/matmul_fp32_base_coder.h"
1403+#include "coder/opcoders/serializers/nnacl_serializer/nnacl_fp32_serializer.h"
1404+#include "nnacl/matmul_parameter.h"
1405+
1406+namespace mindspore::lite::micro::nnacl {
1407+class MatMulFP16BaseCoder : public MatMulFP32BaseCoder {
1408+ public:
1409+  MatMulFP16BaseCoder(const std::vector<Tensor *> &in_tensors, const std::vector<Tensor *> &out_tensors,
1410+                      const LiteGraph::Node *node, size_t node_index, Target target)
1411+      : MatMulFP32BaseCoder(in_tensors, out_tensors, node, node_index, target) {}
1412+
1413+  ~MatMulFP16BaseCoder() override = default;
1414+
1415+  int Prepare(CoderContext *const context) override;
1416+
1417+  int DoCode(CoderContext *const context) override;
1418+
1419+ private:
1420+  int InitBiasData() override;
1421+  int InitBufferA() override;
1422+  int InitBufferB() override;
1423+  std::string InitMatrixA(NNaclFp32Serializer *const code, NNaclFp32Serializer *const init_code,
1424+                          CoderContext *const context, size_t *w_buf);
1425+  std::string InitMatrixB(NNaclFp32Serializer *const code, NNaclFp32Serializer *const init_code,
1426+                          CoderContext *const context, size_t *w_buf);
1427+  int CollectFilesForTarget(CoderContext *const context) override;
1428+
1429+ protected:
1430+  virtual int InitAShape() = 0;
1431+  virtual int InitBShape() = 0;
1432+
1433+ protected:
1434+  int a_batch_ = 1;
1435+  int b_batch_ = 1;
1436+  int bias_count_ = 0;
1437+};
1438+}  // namespace mindspore::lite::micro::nnacl
1439+#endif  // MINDSPORE_LITE_TOOLS_CONVERTER_MICRO_CODER_OPCODERS_NNACL_FP16_MATMUL_FP16_BASE_CODER_H_
1440diff --git a/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp16/matmul_fp16_coder.cc b/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp16/matmul_fp16_coder.cc
1441new file mode 100644
1442index 00000000..26a3b923
1443--- /dev/null
1444+++ b/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp16/matmul_fp16_coder.cc
1445@@ -0,0 +1,79 @@
1446+/**
1447+ * Copyright 2023 Huawei Technologies Co., Ltd
1448+ *
1449+ * Licensed under the Apache License, Version 2.0 (the "License");
1450+ * you may not use this file except in compliance with the License.
1451+ * You may obtain a copy of the License at
1452+ *
1453+ * http://www.apache.org/licenses/LICENSE-2.0
1454+ *
1455+ * Unless required by applicable law or agreed to in writing, software
1456+ * distributed under the License is distributed on an "AS IS" BASIS,
1457+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1458+ * See the License for the specific language governing permissions and
1459+ * limitations under the License.
1460+ */
1461+
1462+#include "coder/opcoders/nnacl/fp16/matmul_fp16_coder.h"
1463+#include <vector>
1464+#include "coder/log.h"
1465+#include "coder/opcoders/file_collector.h"
1466+
1467+using mindspore::schema::PrimitiveType_MatMulFusion;
1468+
1469+namespace mindspore::lite::micro::nnacl {
1470+int MatMulFP16Coder::InitAShape() {
1471+  std::vector<int> a_shape = input_tensor_->shape();
1472+  MS_CHECK_TRUE_MSG(a_shape.size() >= DIMENSION_2D, RET_ERROR, "A-metric tensor's shape is invalid.");
1473+  int batch = 1;
1474+  for (size_t i = 0; i < a_shape.size() - DIMENSION_2D; ++i) {
1475+    batch *= a_shape.at(i);
1476+  }
1477+  a_batch_ = batch;
1478+  params_->batch = batch;
1479+  params_->row_ = params_->a_transpose_ ? a_shape[a_shape.size() - C1NUM] : a_shape[a_shape.size() - C2NUM];
1480+  params_->deep_ = params_->a_transpose_ ? a_shape[a_shape.size() - C2NUM] : a_shape[a_shape.size() - C1NUM];
1481+  params_->row_16_ = UP_ROUND(params_->row_, row_tile_);
1482+  return RET_OK;
1483+}
1484+
1485+int MatMulFP16Coder::InitBShape() {
1486+  std::vector<int> b_shape = filter_tensor_->shape();
1487+  MS_CHECK_TRUE_MSG(b_shape.size() >= DIMENSION_2D, RET_ERROR, "B-metric tensor's shape is invalid.");
1488+  int batch = 1;
1489+  for (size_t i = 0; i < b_shape.size() - DIMENSION_2D; ++i) {
1490+    batch *= b_shape[i];
1491+  }
1492+  b_batch_ = batch;
1493+  params_->batch = batch;
1494+  params_->col_ = params_->b_transpose_ ? b_shape[b_shape.size() - C2NUM] : b_shape[b_shape.size() - C1NUM];
1495+  params_->col_8_ = UP_ROUND(params_->col_, C8NUM);
1496+  params_->deep_ = params_->b_transpose_ ? b_shape[b_shape.size() - C1NUM] : b_shape[b_shape.size() - C2NUM];
1497+  return RET_OK;
1498+}
1499+
1500+int MatMulFP16Coder::Prepare(CoderContext *const context) {
1501+  if (input_tensor_->data_type() != kNumberTypeFloat16) {
1502+    MS_LOG(INFO) << "Input tensor data type is invalid";
1503+    return RET_INPUT_PARAM_INVALID;
1504+  }
1505+  params_ = reinterpret_cast<MatMulParameter *>(parameter_);
1506+  MS_CHECK_TRUE_RET(input_tensors_.size() >= kBiasIndex, RET_ERROR);
1507+  filter_tensor_ = input_tensors_.at(kWeightIndex);
1508+  MS_CHECK_PTR(filter_tensor_);
1509+  if (input_tensors_.size() == kInputSize2) {
1510+    bias_tensor_ = input_tensors_.at(kBiasIndex);
1511+    MS_CHECK_PTR(bias_tensor_);
1512+    MS_CHECK_PTR(bias_tensor_->data());
1513+  }
1514+  params_->a_const_ = (input_tensor_->data() != nullptr);
1515+  params_->b_const_ = (filter_tensor_->data() != nullptr);
1516+  MS_CHECK_RET_CODE(MatMulFP16BaseCoder::Prepare(context), "MatMulFP16Coder prepare failed");
1517+  return RET_OK;
1518+}
1519+
1520+int MatMulFP16Coder::DoCode(CoderContext *const context) { return MatMulFP16BaseCoder::DoCode(context); }
1521+
1522+REG_OPERATOR_CODER(kARM64, kNumberTypeFloat16, PrimitiveType_MatMulFusion, CPUOpCoderCreator<MatMulFP16Coder>)
1523+REG_OPERATOR_CODER(kARM32, kNumberTypeFloat16, PrimitiveType_MatMulFusion, CPUOpCoderCreator<MatMulFP16Coder>)
1524+}  // namespace mindspore::lite::micro::nnacl
1525diff --git a/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp16/matmul_fp16_coder.h b/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp16/matmul_fp16_coder.h
1526new file mode 100644
1527index 00000000..3a1cb66a
1528--- /dev/null
1529+++ b/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp16/matmul_fp16_coder.h
1530@@ -0,0 +1,44 @@
1531+/**
1532+ * Copyright 2023 Huawei Technologies Co., Ltd
1533+ *
1534+ * Licensed under the Apache License, Version 2.0 (the "License");
1535+ * you may not use this file except in compliance with the License.
1536+ * You may obtain a copy of the License at
1537+ *
1538+ * http://www.apache.org/licenses/LICENSE-2.0
1539+ *
1540+ * Unless required by applicable law or agreed to in writing, software
1541+ * distributed under the License is distributed on an "AS IS" BASIS,
1542+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1543+ * See the License for the specific language governing permissions and
1544+ * limitations under the License.
1545+ */
1546+
1547+#ifndef MINDSPORE_LITE_TOOLS_CONVERTER_MICRO_CODER_OPCODERS_NNACL_FP16_MATMUL_FP16_CODER_H_
1548+#define MINDSPORE_LITE_TOOLS_CONVERTER_MICRO_CODER_OPCODERS_NNACL_FP16_MATMUL_FP16_CODER_H_
1549+
1550+#include <vector>
1551+#include "coder/opcoders/nnacl/fp16/matmul_fp16_base_coder.h"
1552+#include "nnacl/matmul_parameter.h"
1553+
1554+namespace mindspore::lite::micro::nnacl {
1555+class MatMulFP16Coder final : public MatMulFP16BaseCoder {
1556+ public:
1557+  MatMulFP16Coder(const std::vector<Tensor *> &in_tensors, const std::vector<Tensor *> &out_tensors,
1558+                  const LiteGraph::Node *node, size_t node_index, Target target)
1559+      : MatMulFP16BaseCoder(in_tensors, out_tensors, node, node_index, target) {
1560+    data_type_size_ = sizeof(uint16_t);
1561+  }
1562+
1563+  ~MatMulFP16Coder() override = default;
1564+
1565+  int Prepare(CoderContext *const context) override;
1566+
1567+  int DoCode(CoderContext *const context) override;
1568+
1569+ private:
1570+  int InitAShape() override;
1571+  int InitBShape() override;
1572+};
1573+}  // namespace mindspore::lite::micro::nnacl
1574+#endif  // MINDSPORE_LITE_TOOLS_CONVERTER_MICRO_CODER_OPCODERS_NNACL_FP16_MATMUL_FP16_CODER_H_
1575diff --git a/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp16/reduce_fp16_coder.cc b/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp16/reduce_fp16_coder.cc
1576new file mode 100644
1577index 00000000..2f289085
1578--- /dev/null
1579+++ b/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp16/reduce_fp16_coder.cc
1580@@ -0,0 +1,75 @@
1581+/**
1582+ * Copyright 2023 Huawei Technologies Co., Ltd
1583+ *
1584+ * Licensed under the Apache License, Version 2.0 (the "License");
1585+ * you may not use this file except in compliance with the License.
1586+ * You may obtain a copy of the License at
1587+ *
1588+ * http://www.apache.org/licenses/LICENSE-2.0
1589+ *
1590+ * Unless required by applicable law or agreed to in writing, software
1591+ * distributed under the License is distributed on an "AS IS" BASIS,
1592+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1593+ * See the License for the specific language governing permissions and
1594+ * limitations under the License.
1595+ */
1596+
1597+#include "coder/opcoders/nnacl/fp16/reduce_fp16_coder.h"
1598+#include "coder/opcoders/file_collector.h"
1599+
1600+using mindspore::schema::PrimitiveType_ReduceFusion;
1601+namespace mindspore::lite::micro::nnacl {
1602+int ReduceFP16Coder::Prepare(CoderContext *const context) {
1603+  MS_CHECK_RET_CODE(ReduceBaseCoder::Init(), "init failed");
1604+  data_type_ = ::kNumberTypeFloat16;
1605+  MS_CHECK_RET_CODE(ReduceBaseCoder::ReSize(), "resize failed");
1606+  MS_CHECK_RET_CODE(ReduceFP32Coder::MallocTmpBuffer(kNumberTypeFloat16), "malloc buffer failed");
1607+  return RET_OK;
1608+}
1609+
1610+int ReduceFP16Coder::DoCode(CoderContext *const context) {
1611+  Collect(context,
1612+          {
1613+            "nnacl/fp16/reduce_fp16.h",
1614+          },
1615+          {
1616+            "reduce_fp16.c",
1617+          });
1618+
1619+  // call the op function
1620+  switch (mode_) {
1621+    case static_cast<int>(schema::ReduceMode_ReduceSum): {
1622+      reduce_ = "ReduceSumFp16";
1623+      break;
1624+    }
1625+    case static_cast<int>(schema::ReduceMode_ReduceMean): {
1626+      reduce_ = "ReduceMeanFp16";
1627+      break;
1628+    }
1629+    case static_cast<int>(schema::ReduceMode_ReduceMax): {
1630+      reduce_ = "ReduceMaxFp16";
1631+      break;
1632+    }
1633+    case static_cast<int>(schema::ReduceMode_ReduceMin): {
1634+      reduce_ = "ReduceMinFp16";
1635+      break;
1636+    }
1637+    case static_cast<int>(schema::ReduceMode_ReduceProd): {
1638+      reduce_ = "ReduceProdFp16";
1639+      break;
1640+    }
1641+    case static_cast<int>(schema::ReduceMode_ReduceL2): {
1642+      reduce_ = "ReduceL2NormFp16";
1643+      break;
1644+    }
1645+    default:
1646+      MS_LOG(ERROR) << "Reduce unsupported reduce_ mode: " << mode_;
1647+      return RET_ERROR;
1648+  }
1649+  GenerateCode(context);
1650+  return RET_OK;
1651+}
1652+
1653+REG_OPERATOR_CODER(kARM32, kNumberTypeFloat16, PrimitiveType_ReduceFusion, CPUOpCoderCreator<ReduceFP16Coder>)
1654+REG_OPERATOR_CODER(kARM64, kNumberTypeFloat16, PrimitiveType_ReduceFusion, CPUOpCoderCreator<ReduceFP16Coder>)
1655+}  // namespace mindspore::lite::micro::nnacl
1656diff --git a/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp16/reduce_fp16_coder.h b/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp16/reduce_fp16_coder.h
1657new file mode 100644
1658index 00000000..2fcf8fb4
1659--- /dev/null
1660+++ b/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp16/reduce_fp16_coder.h
1661@@ -0,0 +1,40 @@
1662+/**
1663+ * Copyright 2023 Huawei Technologies Co., Ltd
1664+ *
1665+ * Licensed under the Apache License, Version 2.0 (the "License");
1666+ * you may not use this file except in compliance with the License.
1667+ * You may obtain a copy of the License at
1668+ *
1669+ * http://www.apache.org/licenses/LICENSE-2.0
1670+ *
1671+ * Unless required by applicable law or agreed to in writing, software
1672+ * distributed under the License is distributed on an "AS IS" BASIS,
1673+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1674+ * See the License for the specific language governing permissions and
1675+ * limitations under the License.
1676+ */
1677+
1678+#ifndef MINDSPORE_LITE_TOOLS_CONVERTER_MICRO_CODER_OPCODERS_NNACL_FP16_REDUCE_FP16_CODER_H_
1679+#define MINDSPORE_LITE_TOOLS_CONVERTER_MICRO_CODER_OPCODERS_NNACL_FP16_REDUCE_FP16_CODER_H_
1680+
1681+#include <string>
1682+#include <vector>
1683+#include "coder/opcoders/nnacl/fp32/reduce_fp32_coder.h"
1684+#include "coder/opcoders/base/reduce_base_coder.h"
1685+#include "coder/opcoders/op_coder.h"
1686+
1687+namespace mindspore::lite::micro::nnacl {
1688+class ReduceFP16Coder final : public ReduceFP32Coder {
1689+ public:
1690+  ReduceFP16Coder(const std::vector<Tensor *> &in_tensors, const std::vector<Tensor *> &out_tensors,
1691+                  const LiteGraph::Node *node, size_t node_index, Target target)
1692+      : ReduceFP32Coder(in_tensors, out_tensors, node, node_index, target) {}
1693+
1694+  ~ReduceFP16Coder() override = default;
1695+
1696+  int Prepare(CoderContext *const context) override;
1697+
1698+  int DoCode(CoderContext *const context) override;
1699+};
1700+}  // namespace mindspore::lite::micro::nnacl
1701+#endif  // MINDSPORE_LITE_TOOLS_CONVERTER_MICRO_CODER_OPCODERS_NNACL_FP16_REDUCE_FP16_CODER_H_
1702diff --git a/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp16/resize_fp16_coder.cc b/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp16/resize_fp16_coder.cc
1703new file mode 100644
1704index 00000000..b6b07570
1705--- /dev/null
1706+++ b/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp16/resize_fp16_coder.cc
1707@@ -0,0 +1,108 @@
1708+/**
1709+ * Copyright 2023 Huawei Technologies Co., Ltd
1710+ *
1711+ * Licensed under the Apache License, Version 2.0 (the "License");
1712+ * you may not use this file except in compliance with the License.
1713+ * You may obtain a copy of the License at
1714+ *
1715+ * http://www.apache.org/licenses/LICENSE-2.0
1716+ *
1717+ * Unless required by applicable law or agreed to in writing, software
1718+ * distributed under the License is distributed on an "AS IS" BASIS,
1719+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1720+ * See the License for the specific language governing permissions and
1721+ * limitations under the License.
1722+ */
1723+
1724+#include "coder/opcoders/nnacl/fp16/resize_fp16_coder.h"
1725+#include <string>
1726+#include <map>
1727+#include <utility>
1728+#include "coder/opcoders/serializers/nnacl_serializer/nnacl_fp32_serializer.h"
1729+#include "coder/opcoders/file_collector.h"
1730+#include "coder/opcoders/parallel.h"
1731+#include "coder/utils/common.h"
1732+#include "nnacl/fp32/resize_fp32.h"
1733+
1734+using mindspore::schema::PrimitiveType_Resize;
1735+
1736+namespace mindspore::lite::micro::nnacl {
1737+int ResizeFP16Coder::DataTypeLen() { return sizeof(uint16_t); }
1738+
1739+int ResizeFP16Coder::DoCode(CoderContext *const context) {
1740+  Collect(context,
1741+          {
1742+            "nnacl/fp16/resize_fp16.h",
1743+            "nnacl/fp32/resize_fp32.h",
1744+          },
1745+          {
1746+            "resize_fp16.c",
1747+            "resize_fp32.c",
1748+          });
1749+  nnacl::NNaclFp32Serializer code;
1750+  code.CodeArray("input_shape", input_tensor_->shape().data(), input_tensor_->shape().size(), true);
1751+  code.CodeArray("output_shape", output_tensor_->shape().data(), output_tensor_->shape().size(), true);
1752+  std::vector<uint16_t> y_weights(y_weight_len_);
1753+  Float32ToFp16(y_weights_, y_weights.data(), y_weight_len_);
1754+  std::vector<uint16_t> x_weights(x_weight_len_);
1755+  Float32ToFp16(x_weights_, x_weights.data(), x_weight_len_);
1756+  int unit = UP_DIV(new_height_, kDefaultThreadNum);
1757+  int h_begin = unit * kDefaultTaskId;
1758+  int h_end = std::min(h_begin + unit, new_height_);
1759+  int channel = input_tensor_->Channel();
1760+
1761+  switch (method_) {
1762+    case static_cast<int>(schema::ResizeMethod_LINEAR): {
1763+      code.CodeArray("y_bottoms", coordinate_.y_bottoms_, y_len_, true);
1764+      code.CodeArray("y_tops", coordinate_.y_tops_, y_len_, true);
1765+      code.CodeArray("x_lefts", coordinate_.x_lefts_, x_len_, true);
1766+      code.CodeArray("x_rights", coordinate_.x_rights_, x_len_, true);
1767+      code.CodeArray("y_weights", y_weights.data(), y_weight_len_, true);
1768+      code.CodeArray("x_weights", x_weights.data(), x_weight_len_, true);
1769+
1770+      code.CodeFunction("PrepareResizeBilinearFp16", "input_shape", "output_shape", calculate_str_, "(int *)y_bottoms",
1771+                        "(int *)y_tops", "(int *)x_lefts", "(int *)x_rights", "(float16_t *)y_weights",
1772+                        "(float16_t *)x_weights");
1773+      code << "    float16_t *line0 = (float16_t *)" << MemoryAllocator::GetInstance()->GetRuntimeAddr(line_buffer_)
1774+           << " + " << new_width_ << " * 2 * " << kDefaultTaskId << ";\n";
1775+      code << "    float16_t *line1 = line0 + " << new_width_ << " * " << channel << ";\n";
1776+      code.CodeFunction("ResizeBilinearFp16", input_tensor_, output_tensor_, "input_shape", "output_shape", "y_bottoms",
1777+                        "y_tops", "x_lefts", "x_rights", "(float16_t *)y_weights", "(float16_t *)x_weights", "line0",
1778+                        "line1", h_begin, h_end);
1779+      break;
1780+    }
1781+    case static_cast<int>(schema::ResizeMethod_NEAREST): {
1782+      code.CodeFunction("ResizeNearestNeighborFp16", input_tensor_, output_tensor_, "input_shape", "output_shape",
1783+                        calculate_str_, coordinate_transform_mode_, kDefaultTaskId, kDefaultThreadNum);
1784+      break;
1785+    }
1786+    case static_cast<int>(schema::ResizeMethod_CUBIC): {
1787+      code.CodeArray("y_tops", coordinate_.y_tops_, y_len_, true);
1788+      code.CodeArray("x_lefts", coordinate_.x_lefts_, x_len_, true);
1789+      code.CodeArray("y_weights", y_weights.data(), y_weight_len_, true);
1790+      code.CodeArray("x_weights", x_weights.data(), x_weight_len_, true);
1791+      auto resize_parameter = reinterpret_cast<ResizeParameter *>(parameter_);
1792+      MS_CHECK_PTR(resize_parameter);
1793+      auto cubic_coeff_str = "(float16_t)" + std::to_string(resize_parameter->cubic_coeff_);
1794+      code.CodeFunction("PrepareResizeBicubicFp16", "input_shape", "output_shape", calculate_str_, "(int *)y_tops",
1795+                        "(int *)x_lefts", "(float16_t *)y_weights", "(float16_t *)x_weights", cubic_coeff_str);
1796+      auto buffer_str = "(float16_t *)" + MemoryAllocator::GetInstance()->GetRuntimeAddr(line_buffer_) + " + " +
1797+                        std::to_string(new_width_ * channel * 4 * kDefaultTaskId);
1798+
1799+      code.CodeFunction("ResizeBicubicFp16", input_tensor_, output_tensor_, "input_shape", "output_shape", "y_tops",
1800+                        "x_lefts", "(float16_t *)y_weights", "(float16_t *)x_weights", buffer_str, h_begin, h_end);
1801+      break;
1802+    }
1803+    default: {
1804+      MS_LOG(ERROR) << "Resize unknown method " << method_;
1805+      return RET_ERROR;
1806+    }
1807+  }
1808+
1809+  context->AppendCode(code.str());
1810+  return RET_OK;
1811+}
1812+
1813+REG_OPERATOR_CODER(kARM32, kNumberTypeFloat16, PrimitiveType_Resize, CPUOpCoderCreator<ResizeFP16Coder>)
1814+REG_OPERATOR_CODER(kARM64, kNumberTypeFloat16, PrimitiveType_Resize, CPUOpCoderCreator<ResizeFP16Coder>)
1815+}  // namespace mindspore::lite::micro::nnacl
1816diff --git a/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp16/resize_fp16_coder.h b/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp16/resize_fp16_coder.h
1817new file mode 100644
1818index 00000000..0a050f59
1819--- /dev/null
1820+++ b/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp16/resize_fp16_coder.h
1821@@ -0,0 +1,41 @@
1822+/**
1823+ * Copyright 2023 Huawei Technologies Co., Ltd
1824+ *
1825+ * Licensed under the Apache License, Version 2.0 (the "License");
1826+ * you may not use this file except in compliance with the License.
1827+ * You may obtain a copy of the License at
1828+ *
1829+ * http://www.apache.org/licenses/LICENSE-2.0
1830+ *
1831+ * Unless required by applicable law or agreed to in writing, software
1832+ * distributed under the License is distributed on an "AS IS" BASIS,
1833+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1834+ * See the License for the specific language governing permissions and
1835+ * limitations under the License.
1836+ */
1837+
1838+#ifndef MINDSPORE_LITE_TOOLS_CONVERTER_MICRO_CODER_OPCODERS_NNACL_FP16_RESIZE_FP16_CODER_H_
1839+#define MINDSPORE_LITE_TOOLS_CONVERTER_MICRO_CODER_OPCODERS_NNACL_FP16_RESIZE_FP16_CODER_H_
1840+
1841+#include "coder/opcoders/nnacl/fp32/resize_fp32_coder.h"
1842+#include <vector>
1843+#include <algorithm>
1844+#include <string>
1845+#include "include/errorcode.h"
1846+#include "src/runtime/kernel_exec.h"
1847+#include "nnacl/base/cast_base.h"
1848+
1849+namespace mindspore::lite::micro::nnacl {
1850+class ResizeFP16Coder : public ResizeFP32Coder {
1851+ public:
1852+  ResizeFP16Coder(const std::vector<Tensor *> &in_tensors, const std::vector<Tensor *> &out_tensors,
1853+                  const LiteGraph::Node *node, size_t node_index, Target target)
1854+      : ResizeFP32Coder(in_tensors, out_tensors, node, node_index, target) {}
1855+  ~ResizeFP16Coder() override { FreeTmpBuffer(); };
1856+  int DoCode(CoderContext *const context) override;
1857+
1858+ private:
1859+  int DataTypeLen() override;
1860+};
1861+}  // namespace mindspore::lite::micro::nnacl
1862+#endif  // MINDSPORE_LITE_TOOLS_CONVERTER_MICRO_CODER_OPCODERS_NNACL_FP16_RESIZE_FP16_CODER_H_
1863diff --git a/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp16/transpose_fp16_coder.cc b/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp16/transpose_fp16_coder.cc
1864new file mode 100644
1865index 00000000..cddcac53
1866--- /dev/null
1867+++ b/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp16/transpose_fp16_coder.cc
1868@@ -0,0 +1,140 @@
1869+/**
1870+ * Copyright 2023 Huawei Technologies Co., Ltd
1871+ *
1872+ * Licensed under the Apache License, Version 2.0 (the "License");
1873+ * you may not use this file except in compliance with the License.
1874+ * You may obtain a copy of the License at
1875+ *
1876+ * http://www.apache.org/licenses/LICENSE-2.0
1877+ *
1878+ * Unless required by applicable law or agreed to in writing, software
1879+ * distributed under the License is distributed on an "AS IS" BASIS,
1880+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1881+ * See the License for the specific language governing permissions and
1882+ * limitations under the License.
1883+ */
1884+
1885+#include "coder/opcoders/nnacl/fp16/transpose_fp16_coder.h"
1886+#include <vector>
1887+#include <unordered_set>
1888+#include "coder/opcoders/serializers/nnacl_serializer/nnacl_fp32_serializer.h"
1889+#include "coder/opcoders/file_collector.h"
1890+#include "coder/opcoders/parallel.h"
1891+#include "coder/utils/common.h"
1892+
1893+using mindspore::schema::PrimitiveType_Transpose;
1894+namespace mindspore::lite::micro::nnacl {
1895+int TransposeFp16Coder::Prepare(CoderContext *const context) {
1896+  if (input_tensor_->data_type() != kNumberTypeFloat16) {
1897+    MS_LOG(INFO) << "Input tensor data type is invalid";
1898+    return lite::RET_INPUT_PARAM_INVALID;
1899+  }
1900+  MS_CHECK_RET_CODE(Init(), "init failed");
1901+  return RET_OK;
1902+}
1903+
1904+int TransposeFp16Coder::ResetStatus() {
1905+  param_->num_axes_ = 0;
1906+  if (input_tensors_.size() == C2NUM) {
1907+    param_->num_axes_ = input_tensors_[SECOND_INPUT]->ElementsNum();
1908+  }
1909+  auto in_shape = input_tensors_[FIRST_INPUT]->shape();
1910+  if (in_shape.size() > MAX_TRANSPOSE_DIM_SIZE) {
1911+    MS_LOG(ERROR) << "input shape out of range.";
1912+    return RET_ERROR;
1913+  }
1914+  int trans_nd[MAX_TRANSPOSE_DIM_SIZE] = {0, 2, 1};
1915+  int *perm_data{nullptr};
1916+  if (in_shape.size() != static_cast<size_t>(param_->num_axes_)) {
1917+    perm_data = trans_nd;
1918+    if (in_shape.size() == C3NUM && param_->num_axes_ == C4NUM) {
1919+      param_->num_axes_ = C3NUM;
1920+    }
1921+    if (param_->num_axes_ == 0) {
1922+      for (int i = 0; i < static_cast<int>(in_shape.size()); ++i) {
1923+        trans_nd[i] = static_cast<int>(in_shape.size()) - 1 - i;
1924+      }
1925+      param_->num_axes_ = static_cast<int>(in_shape.size());
1926+    }
1927+  } else {
1928+    MS_ASSERT(input_tensors_.size() == C2NUM);
1929+    auto perm_tensor = input_tensors_.at(SECOND_INPUT);
1930+    if (perm_tensor->data_type() != kNumberTypeInt32) {
1931+      MS_LOG(ERROR) << "Unsupported type id: " << perm_tensor->data_type() << " of perm tensor.";
1932+      return RET_ERROR;
1933+    }
1934+    perm_data = reinterpret_cast<int *>(perm_tensor->data());
1935+    MSLITE_CHECK_PTR(perm_data);
1936+    std::vector<int> perm(perm_data, perm_data + input_tensors_[SECOND_INPUT]->ElementsNum());
1937+    if (perm.size() != std::unordered_set<int>(perm.cbegin(), perm.cend()).size()) {
1938+      MS_LOG(ERROR) << "Invalid perm, the same element exits in perm.";
1939+      return RET_ERROR;
1940+    }
1941+  }
1942+  MS_CHECK_TRUE_MSG(param_->num_axes_ <= MAX_TRANSPOSE_DIM_SIZE, RET_ERROR, "transpose perm is invalid.");
1943+  for (int i = 0; i < param_->num_axes_; ++i) {
1944+    param_->perm_[i] = perm_data[i];
1945+  }
1946+  return RET_OK;
1947+}
1948+
1949+int TransposeFp16Coder::ComputeOfflineInfo() {
1950+  auto in_shape = input_tensor_->shape();
1951+  auto out_shape = output_tensor_->shape();
1952+  param_->strides_[param_->num_axes_ - 1] = 1;
1953+  param_->out_strides_[param_->num_axes_ - 1] = 1;
1954+  param_->data_num_ = input_tensor_->ElementsNum();
1955+  for (int i = param_->num_axes_ - 2; i >= 0; i--) {
1956+    param_->strides_[i] = in_shape.at(i + 1) * param_->strides_[i + 1];
1957+    param_->out_strides_[i] = out_shape.at(i + 1) * param_->out_strides_[i + 1];
1958+  }
1959+  return RET_OK;
1960+}
1961+
1962+int TransposeFp16Coder::Resize() {
1963+  auto ret = ResetStatus();
1964+  if (ret != RET_OK) {
1965+    MS_LOG(ERROR) << "Do transpose reset failed.";
1966+    return ret;
1967+  }
1968+  if (input_tensors_[FIRST_INPUT]->shape().size() != static_cast<size_t>(param_->num_axes_)) {
1969+    return RET_OK;
1970+  }
1971+  ret = ComputeOfflineInfo();
1972+  if (ret != RET_OK) {
1973+    MS_LOG(ERROR) << "Do compute transpose offline info failed.";
1974+    return ret;
1975+  }
1976+  thread_num_ = 1;
1977+  return RET_OK;
1978+}
1979+
1980+int TransposeFp16Coder::DoCode(CoderContext *const context) {
1981+  Collect(context,
1982+          {
1983+            "nnacl/transpose.h",
1984+            "nnacl/errorcode.h",
1985+            "nnacl/fp16/transpose_fp16.h",
1986+          },
1987+          {
1988+            "transpose_fp16.c",
1989+          });
1990+
1991+  NNaclFp32Serializer code;
1992+  if (input_tensor_->data() != output_tensor_->data()) {
1993+    code.CodeFunction("memcpy", output_tensor_, input_tensor_, input_tensor_->Size());
1994+    context->AppendCode(code.str());
1995+  }
1996+
1997+  auto out_shape = output_tensor_->shape();
1998+  dims_ = static_cast<int>(out_shape.size());
1999+  code.CodeArray("output_shape", out_shape.data(), dims_, true);
2000+  code.CodeStruct("trans_param", *param_);
2001+  code.CodeFunction("DoTransposeFp16", input_tensor_, output_tensor_, "output_shape", "&trans_param");
2002+  context->AppendCode(code.str());
2003+  return RET_OK;
2004+}
2005+
2006+REG_OPERATOR_CODER(kARM32, kNumberTypeFloat16, PrimitiveType_Transpose, CPUOpCoderCreator<TransposeFp16Coder>)
2007+REG_OPERATOR_CODER(kARM64, kNumberTypeFloat16, PrimitiveType_Transpose, CPUOpCoderCreator<TransposeFp16Coder>)
2008+}  // namespace mindspore::lite::micro::nnacl
2009diff --git a/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp16/transpose_fp16_coder.h b/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp16/transpose_fp16_coder.h
2010new file mode 100644
2011index 00000000..240c470e
2012--- /dev/null
2013+++ b/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp16/transpose_fp16_coder.h
2014@@ -0,0 +1,43 @@
2015+/**
2016+ * Copyright 2023 Huawei Technologies Co., Ltd
2017+ *
2018+ * Licensed under the Apache License, Version 2.0 (the "License");
2019+ * you may not use this file except in compliance with the License.
2020+ * You may obtain a copy of the License at
2021+ *
2022+ * http://www.apache.org/licenses/LICENSE-2.0
2023+ *
2024+ * Unless required by applicable law or agreed to in writing, software
2025+ * distributed under the License is distributed on an "AS IS" BASIS,
2026+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
2027+ * See the License for the specific language governing permissions and
2028+ * limitations under the License.
2029+ */
2030+
2031+#ifndef MINDSPORE_LITE_TOOLS_CONVERTER_MICRO_CODER_OPCODERS_NNACL_FP16_TRANSPOSE_FP16_CODER_H_
2032+#define MINDSPORE_LITE_TOOLS_CONVERTER_MICRO_CODER_OPCODERS_NNACL_FP16_TRANSPOSE_FP16_CODER_H_
2033+#include <vector>
2034+#include <string>
2035+#include "coder/opcoders/nnacl/fp32/transpose_fp32_coder.h"
2036+#include "nnacl/transpose.h"
2037+namespace mindspore::lite::micro::nnacl {
2038+class TransposeFp16Coder final : public TransposeFp32Coder {
2039+ public:
2040+  TransposeFp16Coder(const std::vector<Tensor *> &in_tensors, const std::vector<Tensor *> &out_tensors,
2041+                     const LiteGraph::Node *node, size_t node_index, Target target)
2042+      : TransposeFp32Coder(in_tensors, out_tensors, node, node_index, target) {}
2043+
2044+  ~TransposeFp16Coder() override = default;
2045+
2046+  int Prepare(CoderContext *const context) override;
2047+
2048+  int Resize() override;
2049+
2050+  int DoCode(CoderContext *const context) override;
2051+
2052+ private:
2053+  int ResetStatus();
2054+  int ComputeOfflineInfo();
2055+};
2056+}  // namespace mindspore::lite::micro::nnacl
2057+#endif  // MINDSPORE_LITE_TOOLS_CONVERTER_MICRO_CODER_OPCODERS_NNACL_FP16_TRANSPOSE_FP16_CODER_H_
2058diff --git a/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp32/activation_fp32_coder.cc b/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp32/activation_fp32_coder.cc
2059index 35fc1819..edc442e9 100644
2060--- a/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp32/activation_fp32_coder.cc
2061+++ b/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp32/activation_fp32_coder.cc
2062@@ -14,9 +14,7 @@
2063  * limitations under the License.
2064  */
2065 #include "coder/opcoders/nnacl/fp32/activation_fp32_coder.h"
2066-#include <string>
2067 #include "nnacl/fp32/activation_fp32.h"
2068-#include "nnacl/op_base.h"
2069 #include "coder/opcoders/serializers/nnacl_serializer/nnacl_fp32_serializer.h"
2070 #include "coder/opcoders/file_collector.h"
2071 #include "coder/opcoders/parallel.h"
2072@@ -65,9 +63,15 @@ int ActivationFP32Coder::DoCode(CoderContext *const context) {
2073     case schema::ActivationType_HSWISH:
2074       code.CodeFunction("HSwish", input_tensor_, count, output_tensor_);
2075       break;
2076+    case schema::ActivationType_SWISH:
2077+      code.CodeFunction("Swish", input_tensor_, count, output_tensor_);
2078+      break;
2079     case schema::ActivationType_HSIGMOID:
2080       code.CodeFunction("HSigmoid", input_tensor_, count, output_tensor_);
2081       break;
2082+    case schema::ActivationType_ELU:
2083+      code.CodeFunction("Elu", input_tensor_, count, output_tensor_, activation_parameter->alpha_);
2084+      break;
2085     default:
2086       MS_LOG(ERROR) << "Activation type error";
2087       return RET_ERROR;
2088diff --git a/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp32/activation_fp32_coder.h b/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp32/activation_fp32_coder.h
2089index 67be7e6d..f688d4bd 100644
2090--- a/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp32/activation_fp32_coder.h
2091+++ b/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp32/activation_fp32_coder.h
2092@@ -21,7 +21,7 @@
2093 #include "coder/opcoders/op_coder.h"
2094
2095 namespace mindspore::lite::micro::nnacl {
2096-class ActivationFP32Coder final : public OperatorCoder {
2097+class ActivationFP32Coder : public OperatorCoder {
2098  public:
2099   ActivationFP32Coder(const std::vector<Tensor *> &in_tensors, const std::vector<Tensor *> &out_tensors,
2100                       const LiteGraph::Node *node, size_t node_index, Target target)
2101diff --git a/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp32/arithmetic_fp32_coder.cc b/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp32/arithmetic_fp32_coder.cc
2102index d7cc8726..49e53cf9 100644
2103--- a/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp32/arithmetic_fp32_coder.cc
2104+++ b/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp32/arithmetic_fp32_coder.cc
2105@@ -27,8 +27,8 @@ std::string wrap_uint8(const std::string &a) { return "(uint8_t *)(" + a + ")";
2106 std::string wrap_offset(const std::string &a, int offset) { return "(" + a + "+" + std::to_string(offset) + ")"; }
2107 }  // namespace
2108
2109-void ArithmeticFP32Coder::InitRunFunction(int primitive_type) {
2110-  ARITHMETIC_FUNC_INFO_FP32 fun_table[] = {
2111+void ArithmeticFP32Coder::InitFunTable() {
2112+  fun_table_ = {
2113     {PrimitiveType_MulFusion, schema::ActivationType_RELU, "ElementMulRelu", "ElementMulReluInt", "",
2114      "ElementOptMulRelu", "ElementOptMulReluInt"},
2115     {PrimitiveType_MulFusion, schema::ActivationType_RELU6, "ElementMulRelu6", "ElementMulRelu6Int", "",
2116@@ -63,21 +63,23 @@ void ArithmeticFP32Coder::InitRunFunction(int primitive_type) {
2117      "ElementOptModInt"},
2118     {PrimitiveType_SquaredDifference, schema::ActivationType_NO_ACTIVATION, "ElementSquaredDifference", "", "", "",
2119      ""}};
2120+}
2121
2122-  size_t length = sizeof(fun_table) / sizeof(ARITHMETIC_FUNC_INFO_FP32);
2123-  for (size_t i = 0; i < length; i++) {
2124-    if (fun_table[i].primitive_type_ == primitive_type &&
2125-        fun_table[i].activation_type_ == arithmetic_parameter_->activation_type_) {
2126-      arithmetic_run_ = fun_table[i].func_;
2127-      arithmetic_run_int_ = fun_table[i].int_func_;
2128-      arithmetic_run_bool_ = fun_table[i].bool_func_;
2129-      arithmetic_opt_run_ = fun_table[i].opt_func_;
2130-      arithmetic_opt_run_int_ = fun_table[i].opt_int_func_;
2131+void ArithmeticFP32Coder::InitRunFunction(int primitive_type) {
2132+  InitFunTable();
2133+  for (size_t i = 0; i < fun_table_.size(); i++) {
2134+    if (fun_table_[i].primitive_type_ == primitive_type &&
2135+        fun_table_[i].activation_type_ == arithmetic_parameter_->activation_type_) {
2136+      arithmetic_run_ = fun_table_[i].func_;
2137+      arithmetic_run_int_ = fun_table_[i].int_func_;
2138+      arithmetic_run_bool_ = fun_table_[i].bool_func_;
2139+      arithmetic_opt_run_ = fun_table_[i].opt_func_;
2140+      arithmetic_opt_run_int_ = fun_table_[i].opt_int_func_;
2141     }
2142   }
2143   TypeId input_type_id = input_tensor_->data_type();
2144   data_type_len_ = lite::DataTypeSize(input_tensor_->data_type());
2145-  if (input_type_id == kNumberTypeFloat32 || input_type_id == kNumberTypeFloat) {
2146+  if (input_type_id == kNumberTypeFloat32 || input_type_id == kNumberTypeFloat || input_type_id == kNumberTypeFloat16) {
2147     arithmetic_func_type_ = kArithmeticFuncFloat;
2148   } else if (input_type_id == kNumberTypeBool) {
2149     arithmetic_func_type_ = kArithmeticFuncBool;
2150@@ -122,7 +124,8 @@ int ArithmeticFP32Coder::CheckDataType() {
2151 }
2152
2153 void ArithmeticFP32Coder::ChooseArithmeticFunc(bool is_opt) {
2154-  if (input_tensor_->data_type() == kNumberTypeFloat32) {
2155+  if (input_tensor_->data_type() == kNumberTypeFloat32 || input_tensor_->data_type() == kNumberTypeFloat ||
2156+      input_tensor_->data_type() == kNumberTypeFloat16) {
2157     if (is_opt) {
2158       arithmetic_func_str_ = wrap_void(arithmetic_opt_run_);
2159     } else {
2160@@ -204,13 +207,8 @@ int ArithmeticFP32Coder::ConstTensorBroadCast(CoderContext *const context) {
2161   }
2162   FreeConstTileBuff();
2163   NNaclFp32Serializer init_code;
2164-  Collect(context,
2165-          {
2166-            "wrapper/fp32/arithmetic_fp32_wrapper.h",
2167-          },
2168-          {
2169-            "arithmetic_fp32_wrapper.c",
2170-          });
2171+  Collect(context, {"wrapper/fp32/arithmetic_fp32_wrapper.h", "nnacl/fp32/arithmetic_fp32.h"},
2172+          {"arithmetic_fp32_wrapper.c", "arithmetic_fp32.c"});
2173   if (input_tensor_->IsConst() &&
2174       arithmetic_parameter_->in_elements_num0_ != arithmetic_parameter_->out_elements_num_) {
2175     input0_ptr_ = reinterpret_cast<float *>(
2176diff --git a/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp32/arithmetic_fp32_coder.h b/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp32/arithmetic_fp32_coder.h
2177index e1ec51fe..169ed457 100644
2178--- a/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp32/arithmetic_fp32_coder.h
2179+++ b/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp32/arithmetic_fp32_coder.h
2180@@ -66,7 +66,7 @@ using mindspore::schema::PrimitiveType_Minimum;
2181
2182 using mindspore::schema::PrimitiveType_Mod;
2183
2184-class ArithmeticFP32Coder final : public OperatorCoder {
2185+class ArithmeticFP32Coder : public OperatorCoder {
2186   typedef struct {
2187     int primitive_type_;
2188     int activation_type_;
2189@@ -88,14 +88,7 @@ class ArithmeticFP32Coder final : public OperatorCoder {
2190
2191   int DoCode(CoderContext *const context) override;
2192
2193- private:
2194-  int ReSize(CoderContext *const context);
2195-
2196-  int ExecuteCode(const std::string &input0, const std::string &input1, const std::string &output, int size,
2197-                  bool is_opt, CoderContext *const context, NNaclFp32Serializer *const code);
2198-
2199-  void InitRunFunction(int primitive_type);
2200-
2201+ protected:
2202   int CheckDataType();
2203
2204   void ChooseArithmeticFunc(bool is_opt);
2205@@ -108,6 +101,16 @@ class ArithmeticFP32Coder final : public OperatorCoder {
2206
2207   void FreeConstTileBuff();
2208
2209+  virtual void InitFunTable();
2210+
2211+  virtual int ReSize(CoderContext *const context);
2212+
2213+  virtual void InitRunFunction(int primitive_type);
2214+
2215+ private:
2216+  int ExecuteCode(const std::string &input0, const std::string &input1, const std::string &output, int size,
2217+                  bool is_opt, CoderContext *const context, NNaclFp32Serializer *const code);
2218+
2219   int ConstTensorBroadCast(CoderContext *const context);
2220
2221   void ComputeInOutStrides();
2222@@ -121,7 +124,9 @@ class ArithmeticFP32Coder final : public OperatorCoder {
2223
2224   void CollectFilesForFunc(CoderContext *const context);
2225
2226- private:
2227+ protected:
2228+  std::vector<ARITHMETIC_FUNC_INFO_FP32> fun_table_;
2229+
2230   int break_pos_{0};
2231
2232   int outside_{0};
2233@@ -148,10 +153,6 @@ class ArithmeticFP32Coder final : public OperatorCoder {
2234
2235   Tensor *filter_tensor_{nullptr};
2236
2237-  ArithmeticFuncType arithmetic_func_type_{kArithmeticFuncUnknow};
2238-
2239-  ArithmeticWrapperInfo arithmetic_wrapper_info_{};
2240-
2241   std::string input0_ptr_str_;
2242
2243   std::string input1_ptr_str_;
2244@@ -169,6 +170,11 @@ class ArithmeticFP32Coder final : public OperatorCoder {
2245   std::string arithmetic_run_bool_;
2246
2247   std::string arithmetic_func_str_;
2248+
2249+ private:
2250+  ArithmeticFuncType arithmetic_func_type_{kArithmeticFuncUnknow};
2251+
2252+  ArithmeticWrapperInfo arithmetic_wrapper_info_{};
2253 };
2254 }  // namespace mindspore::lite::micro::nnacl
2255 #endif  // MINDSPORE_LITE_TOOLS_CONVERTER_MICRO_CODER_OPCODERS_NNACL_FP32_ARITHMETIC_FP32_CODER_H_
2256diff --git a/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp32/concat_fp32_coder.h b/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp32/concat_fp32_coder.h
2257index 67607e13..6f3f5c71 100644
2258--- a/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp32/concat_fp32_coder.h
2259+++ b/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp32/concat_fp32_coder.h
2260@@ -22,7 +22,7 @@
2261 #include "nnacl/concat_parameter.h"
2262
2263 namespace mindspore::lite::micro::nnacl {
2264-class ConcatFP32Coder final : public OperatorCoder {
2265+class ConcatFP32Coder : public OperatorCoder {
2266  public:
2267   ConcatFP32Coder(const std::vector<Tensor *> &in_tensors, const std::vector<Tensor *> &out_tensors,
2268                   const LiteGraph::Node *node, size_t node_index, Target target)
2269diff --git a/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp32/exp_fp32_coder.cc b/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp32/exp_fp32_coder.cc
2270index 65334f4b..e9bbca19 100644
2271--- a/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp32/exp_fp32_coder.cc
2272+++ b/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp32/exp_fp32_coder.cc
2273@@ -49,7 +49,7 @@ int ExpFP32Coder::DoCode(CoderContext *ctx) {
2274           });
2275   nnacl::NNaclFp32Serializer code;
2276   code.CodeStruct("exp_parameter", *exp_parameter_);
2277-  code.CodeFunction("exp", input_tensor_, output_tensor_, "(ExpParameter *)&exp_parameter", kDefaultTaskId);
2278+  code.CodeFunction("ExpFusionFp32", input_tensor_, output_tensor_, "(ExpParameter *)&exp_parameter", kDefaultTaskId);
2279   ctx->AppendCode(code.str());
2280   return RET_OK;
2281 }
2282diff --git a/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp32/gather_fp32_coder.cc b/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp32/gather_fp32_coder.cc
2283index 087d4dde..3c31479c 100644
2284--- a/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp32/gather_fp32_coder.cc
2285+++ b/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp32/gather_fp32_coder.cc
2286@@ -94,4 +94,6 @@ int GatherFP32Coder::DoCode(CoderContext *context) {
2287 }
2288
2289 REG_OPERATOR_CODER(kAllTargets, kNumberTypeFloat32, PrimitiveType_Gather, CPUOpCoderCreator<GatherFP32Coder>)
2290+REG_OPERATOR_CODER(kAllTargets, kNumberTypeInt32, PrimitiveType_Gather, CPUOpCoderCreator<GatherFP32Coder>)
2291+REG_OPERATOR_CODER(kAllTargets, kNumberTypeFloat16, PrimitiveType_Gather, CPUOpCoderCreator<GatherFP32Coder>)
2292 }  // namespace mindspore::lite::micro::nnacl
2293diff --git a/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp32/layernorm_fp32_coder.cc b/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp32/layernorm_fp32_coder.cc
2294new file mode 100644
2295index 00000000..992a58e4
2296--- /dev/null
2297+++ b/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp32/layernorm_fp32_coder.cc
2298@@ -0,0 +1,81 @@
2299+/**
2300+ * Copyright 2023 Huawei Technologies Co., Ltd
2301+ *
2302+ * Licensed under the Apache License, Version 2.0 (the "License");
2303+ * you may not use this file except in compliance with the License.
2304+ * You may obtain a copy of the License at
2305+ *
2306+ * http://www.apache.org/licenses/LICENSE-2.0
2307+ *
2308+ * Unless required by applicable law or agreed to in writing, software
2309+ * distributed under the License is distributed on an "AS IS" BASIS,
2310+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
2311+ * See the License for the specific language governing permissions and
2312+ * limitations under the License.
2313+ */
2314+#include "coder/opcoders/nnacl/fp32/layernorm_fp32_coder.h"
2315+#include "coder/opcoders/serializers/nnacl_serializer/nnacl_fp32_serializer.h"
2316+#include "coder/opcoders/file_collector.h"
2317+#include "coder/opcoders/parallel.h"
2318+
2319+using mindspore::schema::PrimitiveType_LayerNormFusion;
2320+
2321+namespace mindspore::lite::micro::nnacl {
2322+namespace {
2323+constexpr size_t kOutputNum = 3;
2324+}
2325+int LayerNormFP32Coder::Prepare(CoderContext *const context) {
2326+  param_ = reinterpret_cast<LayerNormParameter *>(parameter_);
2327+  param_->op_parameter_.thread_num_ = 1;
2328+  auto shape = input_tensor_->shape();
2329+  param_->begin_norm_axis_ = param_->begin_norm_axis_ >= 0 ? param_->begin_norm_axis_
2330+                                                           : param_->begin_norm_axis_ + static_cast<int>(shape.size());
2331+  param_->begin_params_axis_ = param_->begin_params_axis_ >= 0
2332+                                 ? param_->begin_params_axis_
2333+                                 : param_->begin_params_axis_ + static_cast<int>(shape.size());
2334+  MS_CHECK_LT(param_->begin_norm_axis_, static_cast<int>(shape.size()), RET_ERROR);
2335+  MS_CHECK_LT(param_->begin_params_axis_, static_cast<int>(shape.size()), RET_ERROR);
2336+  param_->norm_outer_size_ = 1;
2337+  for (int i = 0; i < param_->begin_norm_axis_; ++i) {
2338+    MS_CHECK_FALSE_MSG(INT_MUL_OVERFLOW(param_->norm_outer_size_, shape.at(i)), RET_ERROR, "mul overflow.");
2339+    param_->norm_outer_size_ *= shape.at(i);
2340+  }
2341+  param_->norm_inner_size_ = 1;
2342+  for (size_t i = param_->begin_norm_axis_; i < shape.size(); ++i) {
2343+    MS_CHECK_FALSE_MSG(INT_MUL_OVERFLOW(param_->norm_inner_size_, shape.at(i)), RET_ERROR, "mul overflow.");
2344+    param_->norm_inner_size_ *= shape.at(i);
2345+  }
2346+  param_->params_outer_size_ = 1;
2347+  for (int i = 0; i < param_->begin_params_axis_; ++i) {
2348+    MS_CHECK_FALSE_MSG(INT_MUL_OVERFLOW(param_->params_outer_size_, shape.at(i)), RET_ERROR, "mul overflow.");
2349+    param_->params_outer_size_ *= shape.at(i);
2350+  }
2351+  param_->params_inner_size_ = 1;
2352+  for (size_t i = param_->begin_params_axis_; i < shape.size(); ++i) {
2353+    MS_CHECK_FALSE_MSG(INT_MUL_OVERFLOW(param_->params_inner_size_, shape.at(i)), RET_ERROR, "mul overflow.");
2354+    param_->params_inner_size_ *= shape.at(i);
2355+  }
2356+  return RET_OK;
2357+}
2358+
2359+int LayerNormFP32Coder::DoCode(CoderContext *const context) {
2360+  NNaclFp32Serializer code;
2361+  code.CodeStruct("layer_norm_parm", *param_);
2362+  Collect(context, {"nnacl/fp32/layer_norm_fp32.h"}, {"layer_norm_fp32.c"});
2363+  if (output_tensors_.size() == kOutputNum) {
2364+    code.CodeFunction("LayerNorm", input_tensor_, input_tensors_.at(SECOND_INPUT), input_tensors_.at(THIRD_INPUT),
2365+                      output_tensor_, output_tensors_.at(SECOND_INPUT), output_tensors_.at(THIRD_INPUT),
2366+                      "&layer_norm_parm", 0);
2367+  } else if (output_tensors_.size() == 1) {
2368+    code.CodeFunction("LayerNorm", input_tensor_, input_tensors_.at(SECOND_INPUT), input_tensors_.at(THIRD_INPUT),
2369+                      output_tensor_, "NULL", "NULL", "&layer_norm_parm", 0);
2370+  } else {
2371+    return RET_ERROR;
2372+  }
2373+  context->AppendCode(code.str());
2374+  return RET_OK;
2375+}
2376+
2377+REG_OPERATOR_CODER(kAllTargets, kNumberTypeFloat32, PrimitiveType_LayerNormFusion,
2378+                   CPUOpCoderCreator<LayerNormFP32Coder>)
2379+}  // namespace mindspore::lite::micro::nnacl
2380diff --git a/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp32/layernorm_fp32_coder.h b/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp32/layernorm_fp32_coder.h
2381new file mode 100644
2382index 00000000..a14cff57
2383--- /dev/null
2384+++ b/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp32/layernorm_fp32_coder.h
2385@@ -0,0 +1,40 @@
2386+/**
2387+ * Copyright 2022 Huawei Technologies Co., Ltd
2388+ *
2389+ * Licensed under the Apache License, Version 2.0 (the "License");
2390+ * you may not use this file except in compliance with the License.
2391+ * You may obtain a copy of the License at
2392+ *
2393+ * http://www.apache.org/licenses/LICENSE-2.0
2394+ *
2395+ * Unless required by applicable law or agreed to in writing, software
2396+ * distributed under the License is distributed on an "AS IS" BASIS,
2397+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
2398+ * See the License for the specific language governing permissions and
2399+ * limitations under the License.
2400+ */
2401+
2402+#ifndef MINDSPORE_LITE_TOOLS_CONVERTER_MICRO_CODER_OPCODERS_NNACL_FP32_LAYERNORM_FP32_CODER_H_
2403+#define MINDSPORE_LITE_TOOLS_CONVERTER_MICRO_CODER_OPCODERS_NNACL_FP32_LAYERNORM_FP32_CODER_H_
2404+
2405+#include <vector>
2406+#include "coder/opcoders/op_coder.h"
2407+#include "nnacl/layer_norm_parameter.h"
2408+
2409+namespace mindspore::lite::micro::nnacl {
2410+class LayerNormFP32Coder : public OperatorCoder {
2411+ public:
2412+  LayerNormFP32Coder(const std::vector<Tensor *> &in_tensors, const std::vector<Tensor *> &out_tensors,
2413+                     const LiteGraph::Node *node, size_t node_index, Target target)
2414+      : OperatorCoder(in_tensors, out_tensors, node, node_index, target) {}
2415+  ~LayerNormFP32Coder() override = default;
2416+
2417+  int Prepare(CoderContext *const context) override;
2418+
2419+  int DoCode(CoderContext *const context) override;
2420+
2421+ protected:
2422+  LayerNormParameter *param_;
2423+};
2424+}  // namespace mindspore::lite::micro::nnacl
2425+#endif  // MINDSPORE_LITE_TOOLS_CONVERTER_MICRO_CODER_OPCODERS_NNACL_FP32_LAYERNORM_FP32_CODER_H_
2426diff --git a/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp32/lstm_fp32_coder.cc b/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp32/lstm_fp32_coder.cc
2427index 3bc8ea4b..561f6259 100644
2428--- a/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp32/lstm_fp32_coder.cc
2429+++ b/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp32/lstm_fp32_coder.cc
2430@@ -75,13 +75,6 @@ int LstmFP32Coder::InitStateWeightBias(CoderContext *const context) {
2431     w_buf_size += weight_h_size;
2432     init_code.CodeFunction("PackLstmWeight", weight_h_ptr_, weight_h, weight_batch_, lstm_param_->hidden_size_,
2433                            lstm_param_->hidden_size_, lstm_param_->state_col_align_, "NULL");
2434-  } else {
2435-    size_t weight_h_size = weight_h->Size();
2436-    weight_h_ptr_ =
2437-      reinterpret_cast<float *>(allocator_->Malloc(kNumberTypeFloat32, weight_h->Size(), kOfflinePackWeight));
2438-    MS_CHECK_PTR(weight_h_ptr_);
2439-    MS_CHECK_RET_CODE(memcpy_s(weight_h_ptr_, weight_h_size, weight_h->data(), weight_h_size),
2440-                      "copy weight h data failed");
2441   }
2442
2443   state_bias_ = reinterpret_cast<float *>(allocator_->Malloc(kNumberTypeFloat32, kOnlineSize, kOnlinePackWeight));
2444@@ -214,8 +207,15 @@ int LstmFP32Coder::DoCode(CoderContext *context) {
2445   code.CodeArray("buffer", buffers_addr.data(), buffers_addr.size(), false);
2446   code.CodeFunction("memcpy", output_hidden_state, hidden_state, hidden_state->Size());
2447   code.CodeFunction("memcpy", output_cell_state, cell_state, cell_state->Size());
2448-  code.CodeFunction("Lstm", output_tensor_, input_tensor_, weight_i_ptr_, weight_h_ptr_, input_bias_, state_bias_,
2449-                    output_hidden_state, output_cell_state, "buffer", "&lstm_param");
2450+  if (weight_h_ptr_ != nullptr) {
2451+    code.CodeFunction("Lstm", output_tensor_, input_tensor_, weight_i_ptr_, weight_h_ptr_, input_bias_, state_bias_,
2452+                      output_hidden_state, output_cell_state, "buffer", "&lstm_param");
2453+  } else {
2454+    auto *weight_h_tensor = input_tensors().at(kInputSize1);
2455+    auto weight_h = allocator_->GetRuntimeAddr(weight_h_tensor, weight_h_tensor->IsConst());
2456+    code.CodeFunction("Lstm", output_tensor_, input_tensor_, weight_i_ptr_, weight_h, input_bias_, state_bias_,
2457+                      output_hidden_state, output_cell_state, "buffer", "&lstm_param");
2458+  }
2459   context->AppendCode(code.str());
2460   return RET_OK;
2461 }
2462diff --git a/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp32/matmul_fp32_base_coder.cc b/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp32/matmul_fp32_base_coder.cc
2463index 26707a3e..790a142e 100644
2464--- a/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp32/matmul_fp32_base_coder.cc
2465+++ b/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp32/matmul_fp32_base_coder.cc
2466@@ -55,7 +55,8 @@ int MatMulFP32BaseCoder::InitBiasData() {
2467       is_bias_broadcast_ = true;
2468     }
2469     ori_bias_pack_ptr_size_ = bias_tensor_->ElementsNum() * sizeof(float);
2470-    bias_ptr_ = reinterpret_cast<float *>(allocator_->Malloc(kNumberTypeFloat32, kOnlineSize, kOnlinePackWeight));
2471+    bias_ptr_ = allocator_->Malloc(kNumberTypeFloat32, kOnlineSize, kOnlinePackWeight,
2472+                                   bias_tensor_->tensor_name() + "_online_pack");
2473     MS_CHECK_PTR(bias_ptr_);
2474   }
2475   return RET_OK;
2476@@ -84,7 +85,14 @@ int MatMulFP32BaseCoder::InitBufferA() {
2477   }
2478   a_pack_ptr_size_ = static_cast<size_t>(params_->batch * params_->row_align_ * params_->deep_ * sizeof(float));
2479   if (params_->a_const_) {
2480-    a_pack_ptr_ = reinterpret_cast<float *>(allocator_->Malloc(kNumberTypeFloat32, kOnlineSize, kOnlinePackWeight));
2481+    a_pack_ptr_ = reinterpret_cast<float *>(allocator_->GetSharedWeightAddr(input_tensors_.at(0)));
2482+    if (a_pack_ptr_ == nullptr) {
2483+      a_pack_ptr_ = reinterpret_cast<float *>(allocator_->Malloc(kNumberTypeFloat32, kOnlineSize, kOnlinePackWeight,
2484+                                                                 input_tensors_.at(0)->tensor_name() + "_online_pack"));
2485+      allocator_->MarkSharedWeight(input_tensors_.at(0), a_pack_ptr_);
2486+    } else {
2487+      a_packed_ = true;
2488+    }
2489   } else {
2490     a_pack_ptr_ = reinterpret_cast<float *>(allocator_->Malloc(kNumberTypeFloat32, a_pack_ptr_size_, kWorkspace));
2491   }
2492@@ -96,23 +104,30 @@ int MatMulFP32BaseCoder::InitBufferB() {
2493   if (b_pack_ptr_ != nullptr) {
2494     return RET_OK;
2495   }
2496-  b_pack_ptr_size_ = static_cast<size_t>(params_->batch * params_->col_align_ * params_->deep_ * sizeof(float));
2497+  b_pack_ptr_size_ = static_cast<size_t>(params_->batch * params_->col_align_ * params_->deep_ * data_type_size_);
2498   if (params_->b_const_) {
2499-    b_pack_ptr_ = reinterpret_cast<float *>(allocator_->Malloc(kNumberTypeFloat32, kOnlineSize, kOnlinePackWeight));
2500+    b_pack_ptr_ = reinterpret_cast<float *>(allocator_->GetSharedWeightAddr(input_tensors_.at(1)));
2501+    if (b_pack_ptr_ == nullptr) {
2502+      b_pack_ptr_ = reinterpret_cast<float *>(allocator_->Malloc(kNumberTypeUInt8, b_pack_ptr_size_, kOnlinePackWeight,
2503+                                                                 input_tensors_.at(1)->tensor_name() + "_online_pack"));
2504+      allocator_->MarkSharedWeight(input_tensors_.at(1), b_pack_ptr_);
2505+    } else {
2506+      b_packed_ = true;
2507+    }
2508   } else {
2509-    b_pack_ptr_ = reinterpret_cast<float *>(allocator_->Malloc(kNumberTypeFloat32, b_pack_ptr_size_, kWorkspace));
2510+    b_pack_ptr_ = reinterpret_cast<float *>(allocator_->Malloc(kNumberTypeUInt8, b_pack_ptr_size_, kWorkspace));
2511   }
2512   MS_CHECK_PTR(b_pack_ptr_);
2513   return RET_OK;
2514 }
2515
2516 int MatMulFP32BaseCoder::InitMatrixA(const float *src_ptr) {
2517-  ::InitMatrixA(src_ptr, a_pack_ptr_, params_, vec_matmul_);
2518+  ::InitMatrixA(src_ptr, static_cast<float *>(a_pack_ptr_), params_, vec_matmul_);
2519   return RET_OK;
2520 }
2521
2522 int MatMulFP32BaseCoder::InitMatrixB(const float *src_ptr) {
2523-  ::InitMatrixB(src_ptr, b_pack_ptr_, params_, vec_matmul_);
2524+  ::InitMatrixB(src_ptr, static_cast<float *>(b_pack_ptr_), params_, vec_matmul_);
2525   return RET_OK;
2526 }
2527
2528@@ -179,12 +194,11 @@ int MatMulFP32BaseCoder::DoCode(CoderContext *const context) {
2529   NNaclFp32Serializer code, init_code;
2530   size_t w_buf_size = 0;
2531   std::string param_name = "mat_mul_parameter";
2532-
2533+  std::string bias_ptr_str = "((float *)(" + allocator_->GetRuntimeAddr(bias_ptr_) + "))";
2534   code.CodeStruct(param_name, *params_);
2535   if (support_parallel_) {
2536     code << "    " << param_name << ".op_parameter_.thread_num_ = 1;\n";
2537   }
2538-  init_code.CodeStruct("mat_mul_parameter", *params_);
2539   // do bias packing to init
2540   if (input_tensors_.size() == DIMENSION_3D) {
2541     init_code.CodeBufferOffsetExpression(bias_ptr_, context->weight_name(), context->weight_offset_name(),
2542@@ -193,7 +207,6 @@ int MatMulFP32BaseCoder::DoCode(CoderContext *const context) {
2543     int max_bias_data = params_->col_align_;
2544     if (is_bias_broadcast_) {
2545       float broad_cast_data = (reinterpret_cast<float *>(bias_tensor_->data()))[0];
2546-      std::string bias_ptr_str = "((float *)(" + allocator_->GetRuntimeAddr(bias_ptr_) + "))";
2547       init_code << "\t    for (int i = 0; i < " << max_bias_data << "; ++i) {\n";
2548       init_code << "\t\t    " << bias_ptr_str << "[i] = " << broad_cast_data << ";\n";
2549       init_code << "   }\n";
2550@@ -209,44 +222,43 @@ int MatMulFP32BaseCoder::DoCode(CoderContext *const context) {
2551   std::string a_pack_str = allocator_->GetRuntimeAddr(a_pack_ptr_);
2552   std::string b_pack_str = allocator_->GetRuntimeAddr(b_pack_ptr_);
2553   // do const value packing to init
2554-  if (!params_->a_const_) {
2555-    code.CodeFunction("InitMatrixA", input_tensor_, a_pack_ptr_, "&mat_mul_parameter", vec_matmul_);
2556-    if (!params_->b_const_) {
2557-      init_code.CodeMallocExpression(b_pack_ptr_, b_pack_ptr_size_);
2558-      init_code.CodeFunction("memset", b_pack_ptr_, 0, b_pack_ptr_size_);
2559-    } else {
2560-      init_code.CodeBufferOffsetExpression(b_pack_ptr_, context->weight_name(), context->weight_offset_name(),
2561-                                           context->weight_size_name(), b_pack_ptr_size_);
2562-      w_buf_size += b_pack_ptr_size_;
2563-    }
2564-    std::string b_src_str = b_str;
2565-    if (de_quant_flag_) {
2566-      // reuse to b_pack_str
2567-      b_src_str = Dequant::GetInstance()->de_quant_buffer_str();
2568-      std::string de_quant_function = Dequant::GetInstance()->GetMicroDeQuantFunction(filter_tensor_, b_str);
2569-      init_code << de_quant_function;
2570-    }
2571-    // b_pack_str has been memset, no need to memset
2572-    init_code.CodeFunction("InitMatrixB", b_src_str, b_pack_ptr_, "&mat_mul_parameter", vec_matmul_);
2573+  if ((params_->a_const_ && !a_packed_) || (params_->b_const_ && !b_packed_)) {
2574+    init_code.CodeStruct("mat_mul_parameter", *params_);
2575   }
2576-  if (!params_->b_const_) {
2577-    if (!params_->a_const_) {
2578-      init_code.CodeMallocExpression(a_pack_str, a_pack_ptr_size_);
2579-      init_code.CodeFunction("memset", a_pack_ptr_, 0, a_pack_ptr_size_);
2580-    } else {
2581+  if (params_->a_const_) {
2582+    if (!a_packed_) {
2583       init_code.CodeBufferOffsetExpression(a_pack_ptr_, context->weight_name(), context->weight_offset_name(),
2584                                            context->weight_size_name(), a_pack_ptr_size_);
2585       w_buf_size += a_pack_ptr_size_;
2586+      std::string a_src_str = a_str;
2587+      if (de_quant_flag_) {
2588+        // reuse to a_pack_str
2589+        a_src_str = Dequant::GetInstance()->de_quant_buffer_str();
2590+        std::string de_quant_function = Dequant::GetInstance()->GetMicroDeQuantFunction(input_tensor_, a_str);
2591+        init_code << de_quant_function;
2592+      }
2593+      // a_pack_str has been memset, no need to memset
2594+      init_code.CodeFunction("InitMatrixA", a_src_str, a_pack_ptr_, "&mat_mul_parameter", vec_matmul_);
2595     }
2596-    std::string a_src_str = a_str;
2597-    if (de_quant_flag_) {
2598-      // reuse to a_pack_str
2599-      a_src_str = Dequant::GetInstance()->de_quant_buffer_str();
2600-      std::string de_quant_function = Dequant::GetInstance()->GetMicroDeQuantFunction(input_tensor_, a_str);
2601-      init_code << de_quant_function;
2602+  } else {
2603+    code.CodeFunction("InitMatrixA", input_tensor_, a_pack_ptr_, "&mat_mul_parameter", vec_matmul_);
2604+  }
2605+  if (params_->b_const_) {
2606+    if (!b_packed_) {
2607+      init_code.CodeBufferOffsetExpression(b_pack_ptr_, context->weight_name(), context->weight_offset_name(),
2608+                                           context->weight_size_name(), b_pack_ptr_size_);
2609+      w_buf_size += b_pack_ptr_size_;
2610+      std::string b_src_str = b_str;
2611+      if (de_quant_flag_) {
2612+        // reuse to b_pack_str
2613+        b_src_str = Dequant::GetInstance()->de_quant_buffer_str();
2614+        std::string de_quant_function = Dequant::GetInstance()->GetMicroDeQuantFunction(filter_tensor_, b_str);
2615+        init_code << de_quant_function;
2616+      }
2617+      // b_pack_str has been memset, no need to memset
2618+      init_code.CodeFunction("InitMatrixB", b_src_str, b_pack_ptr_, "&mat_mul_parameter", vec_matmul_);
2619     }
2620-    // a_pack_str has been memset, no need to memset
2621-    init_code.CodeFunction("InitMatrixA", a_src_str, a_pack_ptr_, "&mat_mul_parameter", vec_matmul_);
2622+  } else {
2623     code.CodeFunction("InitMatrixB", filter_tensor_, b_pack_ptr_, "&mat_mul_parameter", vec_matmul_);
2624   }
2625   int current_stride_oc = thread_stride_ * col_tile_;
2626@@ -257,18 +269,18 @@ int MatMulFP32BaseCoder::DoCode(CoderContext *const context) {
2627   if (vec_matmul_) {
2628     code << "      const float *batch_a_ptr = " << a_pack_str << " + i * " << params_->deep_ << ";\n";
2629     code << "      const float *batch_b_ptr = " << b_pack_str << " + i * " << params_->deep_ * params_->col_ << ";\n";
2630-    code << "      float *batch_c_ptr = " << c_str << " + i * " << params_->row_ * params_->col_ << ";\n";
2631+    code << "      float *batch_c_ptr = " << c_str << " + i * " << params_->row_ * params_->col_ << ";\n  ";
2632
2633-    code.CodeFunction("MatVecMulFp32", "batch_a_ptr", "batch_b_ptr", "batch_c_ptr", bias_ptr_, params_->act_type_,
2634+    code.CodeFunction("MatVecMulFp32", "batch_a_ptr", "batch_b_ptr", "batch_c_ptr", bias_ptr_str, params_->act_type_,
2635                       params_->deep_, cur_oc);
2636   } else {
2637     code << "      const float *batch_a_ptr = " << a_pack_str << " + i * " << params_->row_align_ * params_->deep_
2638          << ";\n";
2639     code << "      const float *batch_b_ptr = " << b_pack_str << " + i * " << params_->deep_ * params_->col_align_
2640          << ";\n";
2641-    code << "      float *batch_c_ptr = " << c_str << " + i * " << params_->row_ * params_->col_ << ";\n";
2642+    code << "      float *batch_c_ptr = " << c_str << " + i * " << params_->row_ * params_->col_ << ";\n  ";
2643
2644-    code.CodeFunction("MatMulOpt", "batch_a_ptr", "batch_b_ptr", "batch_c_ptr", bias_ptr_, params_->act_type_,
2645+    code.CodeFunction("MatMulOpt", "batch_a_ptr", "batch_b_ptr", "batch_c_ptr", bias_ptr_str, params_->act_type_,
2646                       params_->deep_, params_->row_, cur_oc, params_->col_, "OutType_Nhwc");
2647   }
2648   code << "    }\n";
2649diff --git a/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp32/matmul_fp32_base_coder.h b/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp32/matmul_fp32_base_coder.h
2650index 4f35254d..68b2658a 100644
2651--- a/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp32/matmul_fp32_base_coder.h
2652+++ b/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp32/matmul_fp32_base_coder.h
2653@@ -36,30 +36,30 @@ class MatMulFP32BaseCoder : public OperatorCoder {
2654
2655   virtual int ReSize();
2656
2657+ protected:
2658+  virtual void ResizeParameter();
2659+  virtual int InitBiasData();
2660+  virtual int InitBufferA();
2661+  virtual int InitBufferB();
2662+  virtual int CollectFilesForTarget(CoderContext *const context);
2663+  virtual int Init();
2664+  virtual void InitParameter();
2665+
2666  private:
2667-  void ResizeParameter();
2668-  int InitBiasData();
2669-  int InitBufferA();
2670-  int InitBufferB();
2671   int InitMatrixA(const float *src_ptr);
2672   int InitMatrixB(const float *src_ptr);
2673-  int CollectFilesForTarget(CoderContext *const context);
2674-
2675- protected:
2676-  virtual int Init();
2677-  void InitParameter();
2678
2679  protected:
2680   Tensor *filter_tensor_{nullptr};
2681   Tensor *bias_tensor_{nullptr};
2682   MatMulParameter *params_{nullptr};
2683-  float *a_pack_ptr_ = nullptr;
2684-  float *b_pack_ptr_ = nullptr;
2685-  float *bias_ptr_{nullptr};
2686+  void *a_pack_ptr_ = nullptr;
2687+  void *b_pack_ptr_ = nullptr;
2688+  void *bias_ptr_{nullptr};
2689   bool vec_matmul_{false};
2690   bool de_quant_flag_{false};
2691-
2692- private:
2693+  bool a_packed_{false};
2694+  bool b_packed_{false};
2695   int col_tile_{0};
2696   int row_tile_{0};
2697   int thread_stride_{0};
2698@@ -69,6 +69,7 @@ class MatMulFP32BaseCoder : public OperatorCoder {
2699   size_t a_pack_ptr_size_{0};
2700   size_t b_pack_ptr_size_{0};
2701   bool is_bias_broadcast_{false};
2702+  size_t data_type_size_{C4NUM};
2703 };
2704 }  // namespace mindspore::lite::micro::nnacl
2705 #endif  // MINDSPORE_LITE_TOOLS_CONVERTER_MICRO_CODER_OPCODERS_NNACL_FP32_MATMUL_FP32_BASE_CODER_H_
2706diff --git a/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp32/pooling_fp32_coder.h b/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp32/pooling_fp32_coder.h
2707index df08dcbe..9f4e0026 100644
2708--- a/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp32/pooling_fp32_coder.h
2709+++ b/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp32/pooling_fp32_coder.h
2710@@ -21,7 +21,7 @@
2711 #include "coder/opcoders/op_coder.h"
2712
2713 namespace mindspore::lite::micro::nnacl {
2714-class PoolingFP32Coder final : public OperatorCoder {
2715+class PoolingFP32Coder : public OperatorCoder {
2716  public:
2717   PoolingFP32Coder(const std::vector<Tensor *> &in_tensors, const std::vector<Tensor *> &out_tensors,
2718                    const LiteGraph::Node *node, size_t node_index, Target target)
2719diff --git a/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp32/reduce_fp32_coder.cc b/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp32/reduce_fp32_coder.cc
2720index 661881af..11e8a3ec 100644
2721--- a/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp32/reduce_fp32_coder.cc
2722+++ b/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp32/reduce_fp32_coder.cc
2723@@ -16,7 +16,6 @@
2724
2725 #include "coder/opcoders/nnacl/fp32/reduce_fp32_coder.h"
2726 #include <string>
2727-#include "coder/log.h"
2728 #include "coder/opcoders/serializers/nnacl_serializer/nnacl_fp32_serializer.h"
2729 #include "coder/opcoders/file_collector.h"
2730
2731@@ -25,14 +24,14 @@ namespace mindspore::lite::micro::nnacl {
2732 int ReduceFP32Coder::Prepare(CoderContext *const context) {
2733   MS_CHECK_RET_CODE(ReduceBaseCoder::Init(), "init failed");
2734   MS_CHECK_RET_CODE(ReSize(), "resize failed");
2735-  MS_CHECK_RET_CODE(MallocTmpBuffer(), "malloc buffer failed");
2736+  MS_CHECK_RET_CODE(MallocTmpBuffer(kNumberTypeFloat32), "malloc buffer failed");
2737   return RET_OK;
2738 }
2739
2740-int ReduceFP32Coder::MallocTmpBuffer() {
2741+int ReduceFP32Coder::MallocTmpBuffer(mindspore::TypeId type_id) {
2742   data_buffers_.clear();
2743   for (auto size : buffer_sizes_) {
2744-    auto *buffer = static_cast<float *>(allocator_->Malloc(kNumberTypeFloat, size * sizeof(float), kWorkspace));
2745+    auto *buffer = static_cast<float *>(allocator_->Malloc(type_id, size * lite::DataTypeSize(type_id), kWorkspace));
2746     MS_CHECK_PTR(buffer);
2747     data_buffers_.emplace_back(buffer);
2748   }
2749@@ -57,41 +56,42 @@ int ReduceFP32Coder::DoCode(CoderContext *const context) {
2750             "reduce_fp32.c",
2751           });
2752
2753-  NNaclFp32Serializer code;
2754   // call the op function
2755-  std::string reduce;
2756-  std::string int_reduce;
2757   switch (mode_) {
2758     case static_cast<int>(schema::ReduceMode_ReduceSum): {
2759-      reduce = "ReduceSum";
2760+      reduce_ = "ReduceSum";
2761       break;
2762     }
2763     case static_cast<int>(schema::ReduceMode_ReduceMean): {
2764-      reduce = "ReduceMean";
2765+      reduce_ = "ReduceMean";
2766       break;
2767     }
2768     case static_cast<int>(schema::ReduceMode_ReduceMax): {
2769-      reduce = "ReduceMax";
2770+      reduce_ = "ReduceMax";
2771       break;
2772     }
2773     case static_cast<int>(schema::ReduceMode_ReduceMin): {
2774-      reduce = "ReduceMin";
2775+      reduce_ = "ReduceMin";
2776       break;
2777     }
2778     case static_cast<int>(schema::ReduceMode_ReduceProd): {
2779-      reduce = "ReduceProd";
2780-      int_reduce = "IntReduceProd";
2781+      reduce_ = "ReduceProd";
2782+      int_reduce_ = "IntReduceProd";
2783       break;
2784     }
2785     case static_cast<int>(schema::ReduceMode_ReduceSumSquare): {
2786-      reduce = "ReduceSumSquare";
2787+      reduce_ = "ReduceSumSquare";
2788       break;
2789     }
2790     default:
2791-      MS_LOG(ERROR) << "Reduce unsupported reduce mode: " << mode_;
2792+      MS_LOG(ERROR) << "Reduce unsupported reduce_ mode: " << mode_;
2793       return RET_ERROR;
2794   }
2795-
2796+  GenerateCode(context);
2797+  return RET_OK;
2798+}
2799+void ReduceFP32Coder::GenerateCode(CoderContext *const context) {
2800+  NNaclFp32Serializer code;
2801   std::string src_addr = allocator_->GetRuntimeAddr(input_tensor_);
2802   std::string dst_addr;
2803   for (int i = 0; i < num_axes_; ++i) {
2804@@ -103,16 +103,16 @@ int ReduceFP32Coder::DoCode(CoderContext *const context) {
2805     outer_size_ = outer_sizes_.at(i);
2806     inner_size_ = inner_sizes_.at(i);
2807     axis_size_ = axis_sizes_.at(i);
2808-    if (data_type_ == ::kNumberTypeFloat32) {
2809-      code.CodeFunction(reduce, outer_size_, inner_size_, axis_size_, src_addr, dst_addr, 0, thread_num_);
2810+    if (data_type_ == ::kNumberTypeInt32) {
2811+      code.CodeFunction(int_reduce_, outer_size_, inner_size_, axis_size_, src_addr, dst_addr, 0, thread_num_);
2812     } else {
2813-      code.CodeFunction(int_reduce, outer_size_, inner_size_, axis_size_, src_addr, dst_addr, 0, thread_num_);
2814+      code.CodeFunction(reduce_, outer_size_, inner_size_, axis_size_, src_addr, dst_addr, 0, thread_num_);
2815     }
2816     src_addr = dst_addr;
2817   }
2818   context->AppendCode(code.str());
2819-  return RET_OK;
2820 }
2821
2822 REG_OPERATOR_CODER(kAllTargets, kNumberTypeFloat32, PrimitiveType_ReduceFusion, CPUOpCoderCreator<ReduceFP32Coder>)
2823+REG_OPERATOR_CODER(kAllTargets, kNumberTypeInt32, PrimitiveType_ReduceFusion, CPUOpCoderCreator<ReduceFP32Coder>)
2824 }  // namespace mindspore::lite::micro::nnacl
2825diff --git a/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp32/reduce_fp32_coder.h b/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp32/reduce_fp32_coder.h
2826index a62f35ec..5b9ccd2b 100644
2827--- a/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp32/reduce_fp32_coder.h
2828+++ b/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp32/reduce_fp32_coder.h
2829@@ -23,7 +23,7 @@
2830 #include "coder/opcoders/op_coder.h"
2831
2832 namespace mindspore::lite::micro::nnacl {
2833-class ReduceFP32Coder final : public ReduceBaseCoder {
2834+class ReduceFP32Coder : public ReduceBaseCoder {
2835  public:
2836   ReduceFP32Coder(const std::vector<Tensor *> &in_tensors, const std::vector<Tensor *> &out_tensors,
2837                   const LiteGraph::Node *node, size_t node_index, Target target)
2838@@ -35,11 +35,17 @@ class ReduceFP32Coder final : public ReduceBaseCoder {
2839
2840   int DoCode(CoderContext *const context) override;
2841
2842- private:
2843-  int ReSize() override;
2844-  int MallocTmpBuffer();
2845+ protected:
2846+  void GenerateCode(CoderContext *const context);
2847+  int MallocTmpBuffer(mindspore::TypeId type_id);
2848+
2849+  std::string reduce_;
2850+  std::string int_reduce_;
2851   TypeIdC data_type_{::kNumberTypeFloat32};
2852   std::vector<float *> data_buffers_;
2853+
2854+ private:
2855+  int ReSize() override;
2856 };
2857 }  // namespace mindspore::lite::micro::nnacl
2858 #endif  // MINDSPORE_LITE_TOOLS_CONVERTER_MICRO_CODER_OPCODERS_NNACL_FP32_REDUCE_FP32_CODER_H_
2859diff --git a/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp32/resize_fp32_coder.cc b/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp32/resize_fp32_coder.cc
2860index a5acf689..d84d0c60 100644
2861--- a/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp32/resize_fp32_coder.cc
2862+++ b/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp32/resize_fp32_coder.cc
2863@@ -68,7 +68,9 @@ int ResizeFP32Coder::ReSize() {
2864   }
2865
2866   MS_CHECK_RET_CODE_WITH_EXE(MallocTmpBuffer(), "MallocTmpBuffer failed", FreeTmpBuffer());
2867-  MS_CHECK_RET_CODE_WITH_EXE(ResizePrepare(), "ResizePrepare failed", FreeTmpBuffer());
2868+  if (input_tensor_->data_type() == kNumberTypeFloat32 || input_tensor_->data_type() == kNumberTypeFloat) {
2869+    MS_CHECK_RET_CODE_WITH_EXE(ResizePrepare(), "ResizePrepare failed", FreeTmpBuffer());
2870+  }
2871
2872   return RET_OK;
2873 }
2874@@ -128,8 +130,8 @@ int ResizeFP32Coder::MallocTmpBuffer() {
2875   }
2876
2877   {
2878-    size_t line_buffer_size = sizeof(float) * x_len_ * input_tensor_->Channel() * kTwo * kMaxThreadNumSupported;
2879-    line_buffer_ = reinterpret_cast<float *>(allocator_->Malloc(kNumberTypeFloat32, line_buffer_size, kWorkspace));
2880+    size_t line_buffer_size = DataTypeLen() * x_len_ * input_tensor_->Channel() * kTwo * kMaxThreadNumSupported;
2881+    line_buffer_ = allocator_->Malloc(kNumberTypeUInt8, line_buffer_size, kWorkspace);
2882     CHECK_MALLOC_RES(line_buffer_, RET_NULL_PTR);
2883   }
2884   return RET_OK;
2885@@ -168,12 +170,12 @@ int ResizeFP32Coder::DoCode(CoderContext *const context) {
2886
2887   switch (method_) {
2888     case static_cast<int>(schema::ResizeMethod_LINEAR): {
2889-      code.CodeArray("y_bottoms", coordinate_.y_bottoms_, sizeof(int) * y_len_, true);
2890-      code.CodeArray("y_tops", coordinate_.y_tops_, sizeof(int) * y_len_, true);
2891-      code.CodeArray("x_lefts", coordinate_.x_lefts_, sizeof(int) * x_len_, true);
2892-      code.CodeArray("x_rights", coordinate_.x_rights_, sizeof(int) * x_len_, true);
2893-      code.CodeArray("y_weights", y_weights_, sizeof(float) * y_weight_len_, true);
2894-      code.CodeArray("x_weights", x_weights_, sizeof(float) * x_weight_len_, true);
2895+      code.CodeArray("y_bottoms", coordinate_.y_bottoms_, y_len_, true);
2896+      code.CodeArray("y_tops", coordinate_.y_tops_, y_len_, true);
2897+      code.CodeArray("x_lefts", coordinate_.x_lefts_, x_len_, true);
2898+      code.CodeArray("x_rights", coordinate_.x_rights_, x_len_, true);
2899+      code.CodeArray("y_weights", y_weights_, y_weight_len_, true);
2900+      code.CodeArray("x_weights", x_weights_, x_weight_len_, true);
2901
2902       int c = input_tensor_->shape().at(kNHWC_C);
2903       code << "float *line0 = " << MemoryAllocator::GetInstance()->GetRuntimeAddr(line_buffer_) << ";\n";
2904@@ -188,12 +190,13 @@ int ResizeFP32Coder::DoCode(CoderContext *const context) {
2905       break;
2906     }
2907     case static_cast<int>(schema::ResizeMethod_CUBIC): {
2908-      code.CodeArray("y_tops", coordinate_.y_tops_, sizeof(int) * y_len_, true);
2909-      code.CodeArray("x_lefts", coordinate_.x_lefts_, sizeof(int) * x_len_, true);
2910-      code.CodeArray("y_weights", y_weights_, sizeof(float) * y_weight_len_, true);
2911-      code.CodeArray("x_weights", x_weights_, sizeof(float) * x_weight_len_, true);
2912+      code.CodeArray("y_tops", coordinate_.y_tops_, y_len_, true);
2913+      code.CodeArray("x_lefts", coordinate_.x_lefts_, x_len_, true);
2914+      code.CodeArray("y_weights", y_weights_, y_weight_len_, true);
2915+      code.CodeArray("x_weights", x_weights_, x_weight_len_, true);
2916+      auto buffer_str = "(float *)" + MemoryAllocator::GetInstance()->GetRuntimeAddr(line_buffer_);
2917       code.CodeFunction("ResizeBicubic", input_tensor_, output_tensor_, "input_shape", "output_shape", "y_tops",
2918-                        "x_lefts", "y_weights", "x_weights", line_buffer_, 0, new_height_);
2919+                        "x_lefts", "y_weights", "x_weights", buffer_str, 0, new_height_);
2920       break;
2921     }
2922     default: {
2923diff --git a/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp32/resize_fp32_coder.h b/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp32/resize_fp32_coder.h
2924index 54594c62..34dffd50 100644
2925--- a/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp32/resize_fp32_coder.h
2926+++ b/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp32/resize_fp32_coder.h
2927@@ -27,7 +27,7 @@
2928 #include "src/runtime/kernel/cpu/fp32/resize_fp32.h"
2929
2930 namespace mindspore::lite::micro::nnacl {
2931-class ResizeFP32Coder final : public ResizeBaseCoder {
2932+class ResizeFP32Coder : public ResizeBaseCoder {
2933  public:
2934   ResizeFP32Coder(const std::vector<Tensor *> &in_tensors, const std::vector<Tensor *> &out_tensors,
2935                   const LiteGraph::Node *node, size_t node_index, Target target)
2936@@ -37,12 +37,12 @@ class ResizeFP32Coder final : public ResizeBaseCoder {
2937   int ReSize();
2938   int DoCode(CoderContext *const context) override;
2939
2940- private:
2941+ protected:
2942   int SelectCalculatorFunc();
2943   void CalTmpBufferLen();
2944   int MallocTmpBuffer();
2945   void FreeTmpBuffer();
2946-  int ResizePrepare();
2947+  virtual int DataTypeLen() { return sizeof(float); }
2948
2949   ResizeCoordinate coordinate_;
2950   size_t x_len_{0};
2951@@ -52,9 +52,12 @@ class ResizeFP32Coder final : public ResizeBaseCoder {
2952
2953   float *y_weights_{nullptr};
2954   float *x_weights_{nullptr};
2955-  float *line_buffer_{nullptr};
2956+  void *line_buffer_{nullptr};
2957   CalculateOriginalCoordinate calculate_{nullptr};
2958   std::string calculate_str_;
2959+
2960+ private:
2961+  int ResizePrepare();
2962 };
2963 }  // namespace mindspore::lite::micro::nnacl
2964 #endif  // MINDSPORE_LITE_TOOLS_CONVERTER_MICRO_CODER_OPCODERS_NNACL_FP32_RESIZE_FP32_CODER_H_
2965diff --git a/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp32/scale_fp32_coder.cc b/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp32/scale_fp32_coder.cc
2966index ae28fe2a..9375a71a 100644
2967--- a/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp32/scale_fp32_coder.cc
2968+++ b/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp32/scale_fp32_coder.cc
2969@@ -28,36 +28,14 @@ int ScaleFP32Coder::InitScaleOffset() {
2970   MS_CHECK_PTR(scale_tensor);
2971   if (scale_tensor->data() != nullptr) {
2972     scale_param_->const_scale_ = true;
2973-    scale_ =
2974-      reinterpret_cast<float *>(allocator_->Malloc(kNumberTypeFloat32, scale_tensor->Size(), kOfflinePackWeight));
2975-    MS_CHECK_PTR(scale_);
2976-    MS_CHECK_TRUE(scale_tensor->Size() > 0, "invalid scale tensor size");
2977-    MS_CHECK_RET_CODE(memcpy_s(scale_, scale_tensor->Size(), scale_tensor->data(), scale_tensor->Size()),
2978-                      "memcpy scale failed");
2979   } else {
2980     scale_param_->const_scale_ = false;
2981-    scale_ = nullptr;
2982   }
2983
2984-  if (input_tensors_.size() == DIMENSION_2D) {
2985+  if (input_tensors_.size() == DIMENSION_3D && input_tensors_.at(kBiasIndex)->data() != nullptr) {
2986     scale_param_->const_offset_ = true;
2987-    offset_ =
2988-      reinterpret_cast<float *>(allocator_->Malloc(kNumberTypeFloat32, scale_tensor->Size(), kOfflinePackWeight));
2989-    MS_CHECK_PTR(offset_);
2990-    MS_CHECK_RET_CODE(memset_s(offset_, scale_tensor->Size(), 0, scale_tensor->Size()), "memset_s failed!");
2991-  } else if (input_tensors_.size() == DIMENSION_3D && input_tensors_.at(kBiasIndex)->data() != nullptr) {
2992-    scale_param_->const_offset_ = true;
2993-    Tensor *offset_tensor = input_tensors_.at(kBiasIndex);
2994-    MS_CHECK_PTR(offset_tensor);
2995-    offset_ =
2996-      reinterpret_cast<float *>(allocator_->Malloc(kNumberTypeFloat32, offset_tensor->Size(), kOfflinePackWeight));
2997-    MS_CHECK_PTR(offset_);
2998-    MS_CHECK_TRUE(offset_tensor->Size() > 0, "invalid offset tensor size");
2999-    MS_CHECK_RET_CODE(memcpy_s(offset_, offset_tensor->Size(), offset_tensor->data(), offset_tensor->Size()),
3000-                      "memcpy_s failed!");
3001   } else {
3002     scale_param_->const_offset_ = false;
3003-    offset_ = nullptr;
3004   }
3005   return RET_OK;
3006 }
3007@@ -125,25 +103,29 @@ int ScaleFP32Coder::DoCode(CoderContext *const context) {
3008
3009   NNaclFp32Serializer code;
3010   code.CodeStruct("scale_parameter", *scale_param_);
3011-
3012+  auto scale = allocator_->GetRuntimeAddr(input_tensors_.at(kWeightIndex), scale_param_->const_scale_);
3013+  std::string offset{"NULL"};
3014+  if (input_tensors_.size() == DIMENSION_3D) {
3015+    offset = allocator_->GetRuntimeAddr(input_tensors_.at(kBiasIndex), scale_param_->const_offset_);
3016+  }
3017   switch (scale_param_->activation_type_) {
3018     case schema::ActivationType_RELU6:
3019-      code.CodeFunction("DoScaleRelu6", input_tensor_, output_tensor_, scale_, offset_, kDefaultTaskId,
3020+      code.CodeFunction("DoScaleRelu6", input_tensor_, output_tensor_, scale, offset, kDefaultTaskId,
3021                         "&scale_parameter");
3022       break;
3023     case schema::ActivationType_RELU: {
3024       if (!support_parallel_) {
3025-        code.CodeFunction("DoScaleRelu", input_tensor_, output_tensor_, scale_, offset_, kDefaultTaskId,
3026+        code.CodeFunction("DoScaleRelu", input_tensor_, output_tensor_, scale, offset, kDefaultTaskId,
3027                           "&scale_parameter");
3028       } else {
3029-        code.CodeBaseStruct("ScaleFp32Args", kRunArgs, input_tensor_, output_tensor_, scale_, offset_,
3030+        code.CodeBaseStruct("ScaleFp32Args", kRunArgs, input_tensor_, output_tensor_, scale, offset,
3031                             "&scale_parameter");
3032         code.CodeFunction(kParallelLaunch, "DoScaleReluRun", kRunArgsAddr, "scale_parameter.op_parameter_.thread_num_");
3033       }
3034       break;
3035     }
3036     case schema::ActivationType_NO_ACTIVATION:
3037-      code.CodeFunction("DoScale", input_tensor_, output_tensor_, scale_, offset_, kDefaultTaskId, "&scale_parameter");
3038+      code.CodeFunction("DoScale", input_tensor_, output_tensor_, scale, offset, kDefaultTaskId, "&scale_parameter");
3039       break;
3040     default:
3041       MS_LOG(ERROR) << "Scale does not support activation type " << scale_param_->activation_type_;
3042diff --git a/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp32/scale_fp32_coder.h b/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp32/scale_fp32_coder.h
3043index 7f8e6242..319ad35a 100644
3044--- a/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp32/scale_fp32_coder.h
3045+++ b/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp32/scale_fp32_coder.h
3046@@ -39,8 +39,6 @@ class ScaleFP32Coder final : public OperatorCoder {
3047
3048  private:
3049   ScaleParameter *scale_param_{nullptr};
3050-  float *scale_{nullptr};
3051-  float *offset_{nullptr};
3052 };
3053 }  // namespace mindspore::lite::micro::nnacl
3054 #endif  // MICRO_CODER_OPCODERS_FP32__CODER_H_
3055diff --git a/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp32/split_fp32_coder.cc b/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp32/split_fp32_coder.cc
3056new file mode 100644
3057index 00000000..6f817386
3058--- /dev/null
3059+++ b/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp32/split_fp32_coder.cc
3060@@ -0,0 +1,77 @@
3061+/**
3062+ * Copyright 2022 Huawei Technologies Co., Ltd
3063+ *
3064+ * Licensed under the Apache License, Version 2.0 (the "License");
3065+ * you may not use this file except in compliance with the License.
3066+ * You may obtain a copy of the License at
3067+ *
3068+ * http://www.apache.org/licenses/LICENSE-2.0
3069+ *
3070+ * Unless required by applicable law or agreed to in writing, software
3071+ * distributed under the License is distributed on an "AS IS" BASIS,
3072+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
3073+ * See the License for the specific language governing permissions and
3074+ * limitations under the License.
3075+ */
3076+#include "coder/opcoders/nnacl/fp32/split_fp32_coder.h"
3077+#include "coder/opcoders/serializers/nnacl_serializer/nnacl_fp32_serializer.h"
3078+#include "coder/opcoders/file_collector.h"
3079+#include "coder/opcoders/parallel.h"
3080+#include "src/runtime/kernel/cpu/base/split_base.h"
3081+
3082+using mindspore::schema::PrimitiveType_Split;
3083+
3084+namespace mindspore::lite::micro::nnacl {
3085+int SplitFP32Coder::Prepare(CoderContext *const context) {
3086+  auto status = mindspore::kernel::SplitBaseCPUKernel::CheckAndInitSplitParam(
3087+    *input_tensor_, reinterpret_cast<SplitParameter *>(parameter_));
3088+  if (RET_OK != status) {
3089+    MS_LOG(ERROR) << "CheckAndInitSplitParam failed";
3090+    return status;
3091+  }
3092+  return RET_OK;
3093+}
3094+
3095+int SplitFP32Coder::DoCode(CoderContext *const context) {
3096+  Collect(context, {"nnacl/base/split_base.h"}, {"split_base.c"});
3097+  if (support_parallel_) {
3098+    Collect(context, {"wrapper/fp32/split_fp32_wrapper.h"}, {"split_fp32_wrapper.c"});
3099+  }
3100+  auto param = reinterpret_cast<SplitParameter *>(parameter_);
3101+  int num_unit = param->split_count_ * param->num_split_;
3102+
3103+  NNaclFp32Serializer code;
3104+  code << "    void *output_ptrs[" << output_tensors_.size() << "] = {";
3105+  for (int i = 0; i < param->num_split_; i++) {
3106+    code << allocator_->GetRuntimeAddr(output_tensors_.at(i)) << ",";
3107+  }
3108+  code << "};\n";
3109+  code << "    int input_dim[" << input_tensor_->shape().size() << "] = {";
3110+  for (auto &dim : input_tensor_->shape()) {
3111+    code << dim << ",";
3112+  }
3113+  code << "};\n";
3114+  code << "    int split_sizes[" << param->num_split_ << "] = {";
3115+  for (int i = 0; i < param->num_split_; i++) {
3116+    code << param->split_sizes_[i] << ",";
3117+  }
3118+  code << "};\n";
3119+
3120+  code.CodeStruct("split_param", *param);
3121+  if (!support_parallel_) {
3122+    code.CodeFunction("DoSplit", input_tensor_, "(void *)output_ptrs", "input_dim", "0", num_unit, "&split_param",
3123+                      lite::DataTypeSize(input_tensor_->data_type()));
3124+  } else {
3125+    code.CodeBaseStruct("SplitFp32Args", kRunArgs, input_tensor_, "(void *)output_ptrs", "input_dim", num_unit,
3126+                        lite::DataTypeSize(input_tensor_->data_type()), "&split_param");
3127+    code.CodeFunction(kParallelLaunch, "DoSplitRun", kRunArgsAddr, "split_param.op_parameter_.thread_num_");
3128+  }
3129+
3130+  context->AppendCode(code.str());
3131+  return RET_OK;
3132+}
3133+
3134+REG_OPERATOR_CODER(kAllTargets, kNumberTypeFloat32, PrimitiveType_Split, CPUOpCoderCreator<SplitFP32Coder>)
3135+REG_OPERATOR_CODER(kAllTargets, kNumberTypeInt32, PrimitiveType_Split, CPUOpCoderCreator<SplitFP32Coder>)
3136+REG_OPERATOR_CODER(kAllTargets, kNumberTypeFloat16, PrimitiveType_Split, CPUOpCoderCreator<SplitFP32Coder>)
3137+}  // namespace mindspore::lite::micro::nnacl
3138diff --git a/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp32/split_fp32_coder.h b/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp32/split_fp32_coder.h
3139new file mode 100644
3140index 00000000..f65214c1
3141--- /dev/null
3142+++ b/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp32/split_fp32_coder.h
3143@@ -0,0 +1,37 @@
3144+/**
3145+ * Copyright 2022 Huawei Technologies Co., Ltd
3146+ *
3147+ * Licensed under the Apache License, Version 2.0 (the "License");
3148+ * you may not use this file except in compliance with the License.
3149+ * You may obtain a copy of the License at
3150+ *
3151+ * http://www.apache.org/licenses/LICENSE-2.0
3152+ *
3153+ * Unless required by applicable law or agreed to in writing, software
3154+ * distributed under the License is distributed on an "AS IS" BASIS,
3155+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
3156+ * See the License for the specific language governing permissions and
3157+ * limitations under the License.
3158+ */
3159+
3160+#ifndef MINDSPORE_LITE_TOOLS_CONVERTER_MICRO_CODER_OPCODERS_NNACL_FP32_SPLIT_FP32_CODER_H_
3161+#define MINDSPORE_LITE_TOOLS_CONVERTER_MICRO_CODER_OPCODERS_NNACL_FP32_SPLIT_FP32_CODER_H_
3162+
3163+#include <vector>
3164+#include "coder/opcoders/op_coder.h"
3165+#include "nnacl/split_parameter.h"
3166+
3167+namespace mindspore::lite::micro::nnacl {
3168+class SplitFP32Coder : public OperatorCoder {
3169+ public:
3170+  SplitFP32Coder(const std::vector<Tensor *> &in_tensors, const std::vector<Tensor *> &out_tensors,
3171+                 const LiteGraph::Node *node, size_t node_index, Target target)
3172+      : OperatorCoder(in_tensors, out_tensors, node, node_index, target) {}
3173+  ~SplitFP32Coder() override = default;
3174+
3175+  int Prepare(CoderContext *const context) override;
3176+
3177+  int DoCode(CoderContext *const context) override;
3178+};
3179+}  // namespace mindspore::lite::micro::nnacl
3180+#endif  // MINDSPORE_LITE_TOOLS_CONVERTER_MICRO_CODER_OPCODERS_NNACL_FP32_SPLIT_FP32_CODER_H_
3181diff --git a/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp32/transpose_fp32_coder.cc b/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp32/transpose_fp32_coder.cc
3182index 2512c9a7..a5882722 100644
3183--- a/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp32/transpose_fp32_coder.cc
3184+++ b/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp32/transpose_fp32_coder.cc
3185@@ -48,10 +48,6 @@ int TransposeFp32Coder::Resize() {
3186     param_->out_strides_[i] = out_shape.at(i + 1) * param_->out_strides_[i + 1];
3187   }
3188
3189-  out_shape_ =
3190-    reinterpret_cast<int *>(allocator_->Malloc(kNumberTypeInt32, out_shape.size() * sizeof(int), kOfflinePackWeight));
3191-  MS_CHECK_PTR(out_shape_);
3192-  memcpy(out_shape_, out_shape.data(), in_shape.size() * sizeof(int));
3193   return RET_OK;
3194 }
3195
3196@@ -141,7 +137,9 @@ int TransposeFp32Coder::DoCode(CoderContext *const context) {
3197   }
3198
3199   code.CodeStruct("trans_param", *param_);
3200-  dims_ = output_tensor_->shape().size();
3201+  auto out_shape = output_tensor_->shape();
3202+  dims_ = static_cast<int>(out_shape.size());
3203+  code.CodeArray("output_shape", out_shape.data(), dims_, true);
3204   if (dims_ > MAX_TRANSPOSE_DIM_SIZE) {
3205     int *dim_size = reinterpret_cast<int *>(malloc(dims_ * sizeof(int)));
3206     if (dim_size == nullptr) {
3207@@ -149,7 +147,7 @@ int TransposeFp32Coder::DoCode(CoderContext *const context) {
3208     }
3209     *(dim_size + dims_ - 1) = 1;
3210     for (int i = dims_ - 1; i > 0; --i) {
3211-      *(dim_size + i - 1) = *(dim_size + i) * out_shape_[i];
3212+      *(dim_size + i - 1) = *(dim_size + i) * out_shape[i];
3213     }
3214     code.CodeArray("dim_size", dim_size, dims_);
3215     int *position = reinterpret_cast<int *>(malloc(dims_ * thread_num_ * sizeof(int)));
3216@@ -158,12 +156,12 @@ int TransposeFp32Coder::DoCode(CoderContext *const context) {
3217       return RET_NULL_PTR;
3218     }
3219     code.CodeArray("position", position, dims_ * thread_num_);
3220-    code.CodeFunction("TransposeDimsFp32", input_tensor_, output_tensor_, out_shape_, "dim_size", "position",
3221+    code.CodeFunction("TransposeDimsFp32", input_tensor_, output_tensor_, "output_shape", "dim_size", "position",
3222                       "&trans_param", kDefaultTaskId, thread_num_);
3223     free(dim_size);
3224     free(position);
3225   } else {
3226-    code.CodeFunction("DoTransposeFp32", input_tensor_, output_tensor_, out_shape_, "&trans_param");
3227+    code.CodeFunction("DoTransposeFp32", input_tensor_, output_tensor_, "output_shape", "&trans_param");
3228   }
3229   context->AppendCode(code.str());
3230   return RET_OK;
3231diff --git a/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp32/transpose_fp32_coder.h b/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp32/transpose_fp32_coder.h
3232index c4cd37a9..0fdbb407 100644
3233--- a/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp32/transpose_fp32_coder.h
3234+++ b/mindspore/lite/tools/converter/micro/coder/opcoders/nnacl/fp32/transpose_fp32_coder.h
3235@@ -21,7 +21,7 @@
3236 #include "coder/opcoders/op_coder.h"
3237 #include "nnacl/transpose.h"
3238 namespace mindspore::lite::micro::nnacl {
3239-class TransposeFp32Coder final : public OperatorCoder {
3240+class TransposeFp32Coder : public OperatorCoder {
3241  public:
3242   TransposeFp32Coder(const std::vector<Tensor *> &in_tensors, const std::vector<Tensor *> &out_tensors,
3243                      const LiteGraph::Node *node, size_t node_index, Target target)
3244@@ -33,18 +33,18 @@ class TransposeFp32Coder final : public OperatorCoder {
3245
3246   int DoCode(CoderContext *const context) override;
3247
3248-  int Resize();
3249+  virtual int Resize();
3250
3251   int Init();
3252
3253+ protected:
3254+  TransposeParameter *param_{nullptr};
3255+  int dims_{0};
3256+
3257  private:
3258   void GetNHNCTransposeFunc();
3259-
3260-  TransposeParameter *param_{nullptr};
3261-  int *out_shape_{nullptr};
3262   std::string NHNCTransposeFunc_;
3263   int nhnc_param_[3];
3264-  int dims_{0};
3265 };
3266 }  // namespace mindspore::lite::micro::nnacl
3267 #endif  // MINDSPORE_LITE_TOOLS_CONVERTER_MICRO_CODER_OPCODERS_NNACL_FP32_TRANSPOSE_FP32_CODER_H_
3268diff --git a/mindspore/lite/tools/converter/micro/coder/opcoders/serializers/nnacl_serializer/nnacl_fp32_serializer.cc b/mindspore/lite/tools/converter/micro/coder/opcoders/serializers/nnacl_serializer/nnacl_fp32_serializer.cc
3269index 49314886..c333b621 100644
3270--- a/mindspore/lite/tools/converter/micro/coder/opcoders/serializers/nnacl_serializer/nnacl_fp32_serializer.cc
3271+++ b/mindspore/lite/tools/converter/micro/coder/opcoders/serializers/nnacl_serializer/nnacl_fp32_serializer.cc
3272@@ -177,6 +177,25 @@ void NNaclFp32Serializer::CodeStruct(const std::string &name, const OpParameter
3273                         op_param.is_train_session_, op_param.is_zero_shape_);
3274 }
3275
3276+void NNaclFp32Serializer::CodeStruct(const std::string &name, const SplitParameter &split_parameter) {
3277+  CodeBaseStruct("SplitParameter", name, split_parameter.op_parameter_, split_parameter.num_split_, "split_sizes",
3278+                 split_parameter.split_dim_, ToString(split_parameter.strides_), "{0}", split_parameter.n_dims_,
3279+                 split_parameter.split_count_);
3280+}
3281+
3282+void NNaclFp32Serializer::CodeStruct(const std::string &name, const LayerNormParameter &op_param) {
3283+  CodeBaseStruct<false>("LayerNormParameter", name, op_param.op_parameter_, op_param.epsilon_,
3284+                        op_param.elementwise_mode_, op_param.elementwise_affine_, op_param.begin_norm_axis_,
3285+                        op_param.begin_params_axis_, op_param.norm_inner_size_, op_param.norm_outer_size_,
3286+                        op_param.params_inner_size_, op_param.params_outer_size_, op_param.normalized_dims_,
3287+                        ToString(op_param.normalized_shape_), op_param.thread_count_, op_param.thread_outsize_);
3288+}
3289+
3290+void NNaclFp32Serializer::CodeStruct(const std::string &name, const BroadcastShapeInfo &op_param) {
3291+  CodeBaseStruct<false>("BroadcastShapeInfo", name, ToString(op_param.input_shape_), op_param.input_shape_size_,
3292+                        ToString(op_param.output_shape_), op_param.output_shape_size_);
3293+}
3294+
3295 void NNaclFp32Serializer::CodeArrayStruct(const std::string &name, TensorC *tensorC, std::vector<Tensor *> tensor) {
3296   std::vector<std::string> tensor_names;
3297   int size = tensor.size();
3298diff --git a/mindspore/lite/tools/converter/micro/coder/opcoders/serializers/nnacl_serializer/nnacl_fp32_serializer.h b/mindspore/lite/tools/converter/micro/coder/opcoders/serializers/nnacl_serializer/nnacl_fp32_serializer.h
3299index 8e1350f5..f52ced20 100644
3300--- a/mindspore/lite/tools/converter/micro/coder/opcoders/serializers/nnacl_serializer/nnacl_fp32_serializer.h
3301+++ b/mindspore/lite/tools/converter/micro/coder/opcoders/serializers/nnacl_serializer/nnacl_fp32_serializer.h
3302@@ -41,6 +41,9 @@
3303 #include "wrapper/fp32/arithmetic_fp32_wrapper.h"
3304 #include "wrapper/base/affine_wrapper.h"
3305 #include "wrapper/fp32/conv_winograd_fp32_wrapper.h"
3306+#include "nnacl/layer_norm_parameter.h"
3307+#include "nnacl/broadcast_to_parameter.h"
3308+#include "nnacl/split_parameter.h"
3309
3310 namespace mindspore::lite::micro::nnacl {
3311 class NNaclFp32Serializer : public Serializer {
3312@@ -68,6 +71,9 @@ class NNaclFp32Serializer : public Serializer {
3313   void CodeStruct(const std::string &name, const GroupNormParameter &gn_param);
3314   void CodeStruct(const std::string &name, const ActivationParameter &activation_parameter);
3315   void CodeStruct(const std::string &name, const OpParameter &op_param);
3316+  void CodeStruct(const std::string &name, const SplitParameter &split_parameter);
3317+  void CodeStruct(const std::string &name, const LayerNormParameter &param);
3318+  void CodeStruct(const std::string &name, const BroadcastShapeInfo &param);
3319   void CodeArrayStruct(const std::string &name, TensorC *tensorC, std::vector<Tensor *> tensor);
3320
3321  private:
3322diff --git a/mindspore/lite/tools/converter/micro/coder/session.cc b/mindspore/lite/tools/converter/micro/coder/session.cc
3323index 10dc2dc6..471f1491 100644
3324--- a/mindspore/lite/tools/converter/micro/coder/session.cc
3325+++ b/mindspore/lite/tools/converter/micro/coder/session.cc
3326@@ -119,12 +119,13 @@ int CoderSession::GenerateCode() {
3327   return ret;
3328 }
3329
3330-int CoderSession::Init(const void *content, int size) {
3331+int CoderSession::Init(const void *content, int size, bool enableFp16) {
3332   MS_LOG(INFO) << "CoderSession::Init start";
3333   Model *model = lite::Model::Import(static_cast<const char *>(content), size);
3334   MS_CHECK_PTR(model);
3335   coder_graph_ = std::make_unique<CoderGraph>(model);
3336   context_ = std::make_unique<CoderContext>();
3337+  enableFp16_ = enableFp16;
3338   MS_LOG(INFO) << "CoderSession::Init done";
3339   return RET_OK;
3340 }
3341@@ -299,7 +300,7 @@ int CoderSession::CreateOpCoders() {
3342 }
3343
3344 int CoderSession::InitCodeGraph() {
3345-  MS_CHECK_RET_CODE(coder_graph_->ConvertTensors(), "convert tensors failed");
3346+  MS_CHECK_RET_CODE(coder_graph_->ConvertTensors(enableFp16_), "convert tensors failed");
3347   MS_CHECK_RET_CODE(coder_graph_->InitGraphInOutTensors(), "init graph inputs and outputs failed");
3348   return RET_OK;
3349 }
3350diff --git a/mindspore/lite/tools/converter/micro/coder/session.h b/mindspore/lite/tools/converter/micro/coder/session.h
3351index f1039af0..3a8f7290 100644
3352--- a/mindspore/lite/tools/converter/micro/coder/session.h
3353+++ b/mindspore/lite/tools/converter/micro/coder/session.h
3354@@ -34,7 +34,7 @@ class CoderSession {
3355
3356   ~CoderSession();
3357
3358-  int Init(const void *content, int size);
3359+  int Init(const void *content, int size, bool enableFp16);
3360
3361   int Build();
3362
3363@@ -57,6 +57,7 @@ class CoderSession {
3364   MemoryAllocator *allocator_{nullptr};
3365   std::vector<std::unique_ptr<OperatorCoder>> op_coders_;
3366   int schema_version_ = SCHEMA_VERSION::SCHEMA_CUR;
3367+  bool enableFp16_{false};
3368 };
3369
3370 std::shared_ptr<CoderSession> CreateCoderSession();
3371--
33722.17.1
3373
3374