1 /**
2 * Copyright 2021 Huawei Technologies Co., Ltd
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16 #include "src/runtime/kernel/arm/fp16/layer_norm_fp16.h"
17 #include <vector>
18 #include "schema/model_generated.h"
19 #include "src/kernel_registry.h"
20 #include "include/errorcode.h"
21 #include "nnacl/fp16/layer_norm_fp16.h"
22
23 using mindspore::kernel::KERNEL_ARCH;
24 using mindspore::lite::KernelRegistrar;
25 using mindspore::lite::RET_ERROR;
26 using mindspore::lite::RET_OK;
27 using mindspore::schema::PrimitiveType_LayerNormFusion;
28
29 namespace mindspore::kernel {
Init()30 int LayerNormFp16CPUKernel::Init() {
31 CHECK_LESS_RETURN(in_tensors_.size(), 3);
32 CHECK_LESS_RETURN(out_tensors_.size(), 1);
33 CHECK_NULL_RETURN(param_);
34 if (!InferShapeDone()) {
35 return RET_OK;
36 }
37 return ReSize();
38 }
39
ReSize()40 int LayerNormFp16CPUKernel::ReSize() {
41 auto input = in_tensors_.front();
42 CHECK_NULL_RETURN(input);
43 auto shape = input->shape();
44 param_->begin_norm_axis_ =
45 param_->begin_norm_axis_ > 0 ? param_->begin_norm_axis_ : param_->begin_norm_axis_ + shape.size();
46 param_->begin_params_axis_ =
47 param_->begin_params_axis_ > 0 ? param_->begin_params_axis_ : param_->begin_params_axis_ + shape.size();
48
49 param_->norm_outer_size_ = 1;
50 for (int i = 0; i < param_->begin_norm_axis_; ++i) {
51 param_->norm_outer_size_ *= shape.at(i);
52 }
53 param_->norm_inner_size_ = 1;
54 for (size_t i = param_->begin_norm_axis_; i < shape.size(); ++i) {
55 param_->norm_inner_size_ *= shape.at(i);
56 }
57 param_->params_outer_size_ = 1;
58 for (int i = 0; i < param_->begin_params_axis_; ++i) {
59 param_->params_outer_size_ *= shape.at(i);
60 }
61 param_->params_inner_size_ = 1;
62 for (size_t i = param_->begin_params_axis_; i < shape.size(); ++i) {
63 param_->params_inner_size_ *= shape.at(i);
64 }
65 op_parameter_->thread_num_ = MSMIN(param_->norm_outer_size_, op_parameter_->thread_num_);
66 return RET_OK;
67 }
68
DoLayerNormFp16(int thread_id)69 int LayerNormFp16CPUKernel::DoLayerNormFp16(int thread_id) {
70 auto ret = LayerNormFp16(src_data_, gamma_data_, beta_data_, dst_data_, mean_data_, var_data_, param_, thread_id);
71 if (ret != RET_OK) {
72 MS_LOG(ERROR) << "DoLayerNorm error error_code[" << ret << "]";
73 return ret;
74 }
75 return RET_OK;
76 }
77
LayerNormFp16Run(void * cdata,int task_id,float lhs_scale,float rhs_scale)78 int LayerNormFp16Run(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
79 auto kernel = reinterpret_cast<LayerNormFp16CPUKernel *>(cdata);
80 CHECK_NULL_RETURN(kernel);
81 auto ret = kernel->DoLayerNormFp16(task_id);
82 if (ret != RET_OK) {
83 MS_LOG(ERROR) << "LayerNormFp16Run error task_id[" << task_id << "] error_code[" << ret << "]";
84 return RET_ERROR;
85 }
86 return RET_OK;
87 }
88
Run()89 int LayerNormFp16CPUKernel::Run() {
90 src_data_ = reinterpret_cast<float16_t *>(in_tensors_.at(0)->data());
91 CHECK_NULL_RETURN(src_data_);
92 gamma_data_ = reinterpret_cast<float16_t *>(in_tensors_.at(1)->data());
93 CHECK_NULL_RETURN(gamma_data_);
94 beta_data_ = reinterpret_cast<float16_t *>(in_tensors_.at(2)->data());
95 CHECK_NULL_RETURN(beta_data_);
96 dst_data_ = reinterpret_cast<float16_t *>(out_tensors_.at(0)->data());
97 CHECK_NULL_RETURN(dst_data_);
98
99 if (out_tensors_.size() == 3) {
100 mean_data_ = reinterpret_cast<float16_t *>(out_tensors_.at(1)->data());
101 CHECK_NULL_RETURN(mean_data_);
102 var_data_ = reinterpret_cast<float16_t *>(out_tensors_.at(2)->data());
103 CHECK_NULL_RETURN(var_data_);
104 } else if (out_tensors_.size() != 1) {
105 MS_LOG(ERROR) << "LayerNorm should have 1 or 3 output tensors";
106 return RET_ERROR;
107 }
108 auto ret = ParallelLaunch(this->ms_context_, LayerNormFp16Run, this, op_parameter_->thread_num_);
109 return ret;
110 }
111
112 REG_KERNEL(kCPU, kNumberTypeFloat16, PrimitiveType_LayerNormFusion, LiteKernelCreator<LayerNormFp16CPUKernel>)
113 } // namespace mindspore::kernel
114