1 /**
2 * Copyright 2020 Huawei Technologies Co., Ltd
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "src/runtime/kernel/arm/fp16/scale_fp16.h"
18 #include <cstring>
19 #include <vector>
20 #include "schema/model_generated.h"
21 #include "src/kernel_registry.h"
22 #include "include/errorcode.h"
23 #include "src/runtime/kernel/arm/fp16/common_fp16.h"
24 #include "nnacl/fp16/scale_fp16.h"
25 #include "nnacl/fp16/cast_fp16.h"
26
27 using mindspore::lite::KernelRegistrar;
28 using mindspore::lite::RET_ERROR;
29 using mindspore::lite::RET_OK;
30 using mindspore::schema::PrimitiveType_ScaleFusion;
31
32 namespace mindspore::kernel {
InitScaleOffset()33 int ScaleFp16CPUKernel::InitScaleOffset() {
34 auto scale_tensor = in_tensors_.at(1);
35 malloc_scale_ = scale_tensor->data_type() == kNumberTypeFloat32;
36
37 if (in_tensors_.size() == 2) {
38 malloc_offset_ = true;
39 } else {
40 auto offset_tensor = in_tensors_.at(2);
41 malloc_offset_ = offset_tensor->data_type() == kNumberTypeFloat32;
42 }
43 return RET_OK;
44 }
45
Init()46 int ScaleFp16CPUKernel::Init() {
47 if (in_tensors_.size() < 2 || in_tensors_.size() > 3) {
48 MS_LOG(ERROR) << "inputs to Scale operator should be 2 or 3, but " << in_tensors_.size() << " is given.";
49 return RET_ERROR;
50 }
51 CHECK_LESS_RETURN(out_tensors_.size(), 1);
52
53 if (!InferShapeDone()) {
54 return RET_OK;
55 }
56 auto ret = ReSize();
57 if (ret != RET_OK) {
58 MS_LOG(ERROR) << "Scale fp16 Resize failed";
59 return RET_ERROR;
60 }
61 return RET_OK;
62 }
63
ReSize()64 int ScaleFp16CPUKernel::ReSize() {
65 auto ret = CalculateParameter();
66 if (ret != RET_OK) {
67 MS_LOG(ERROR) << "Scale fp16 CalculateParameter failed.";
68 return RET_ERROR;
69 }
70
71 return RET_OK;
72 }
73
Scale(int task_id)74 int ScaleFp16CPUKernel::Scale(int task_id) {
75 switch (scale_param_->activation_type_) {
76 case schema::ActivationType_RELU6:
77 DoScaleRelu6Fp16(input_, output_, scale_, offset_, task_id, scale_param_);
78 break;
79 case schema::ActivationType_RELU:
80 Fp16DoScaleRelu(input_, output_, scale_, offset_, task_id, scale_param_);
81 break;
82 case schema::ActivationType_NO_ACTIVATION:
83 DoScaleFp16(input_, output_, scale_, offset_, task_id, scale_param_);
84 break;
85 default:
86 MS_LOG(ERROR) << "ScaleFp16 does not support activation type " << scale_param_->activation_type_;
87 return RET_ERROR;
88 }
89 return RET_OK;
90 }
91
ScaleFp16Run(void * cdata,int task_id,float lhs_scale,float rhs_scale)92 int ScaleFp16Run(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
93 auto scale = reinterpret_cast<ScaleFp16CPUKernel *>(cdata);
94 auto ret = scale->Scale(task_id);
95 if (ret != RET_OK) {
96 MS_LOG(ERROR) << "ScaleRun error task_id[" << task_id << "] error_code[" << ret << "]";
97 return RET_ERROR;
98 }
99 return RET_OK;
100 }
101
Run()102 int ScaleFp16CPUKernel::Run() {
103 auto input_tensor = in_tensors_.at(0);
104 auto output_tensor = out_tensors_.at(0);
105 CHECK_NULL_RETURN(input_tensor);
106 CHECK_NULL_RETURN(output_tensor);
107 input_ = reinterpret_cast<float16_t *>(input_tensor->data());
108 output_ = reinterpret_cast<float16_t *>(output_tensor->data());
109 CHECK_NULL_RETURN(input_);
110 CHECK_NULL_RETURN(output_);
111 auto ret = InitScaleOffset();
112 if (ret != RET_OK) {
113 MS_LOG(ERROR) << "Scale fp16 InitScaleOffset failed.";
114 return RET_ERROR;
115 }
116
117 ret = MallocAssignTmpBuffer();
118 if (ret != RET_OK) {
119 MS_LOG(ERROR) << "Scale Fp16 malloc tmp buffer failed";
120 FreeTmpBuffer();
121 return ret;
122 }
123
124 ret = ParallelLaunch(this->ms_context_, ScaleFp16Run, this, op_parameter_->thread_num_);
125 if (ret != RET_OK) {
126 MS_LOG(ERROR) << "Scale error error_code[" << ret << "]";
127 FreeTmpBuffer();
128 return RET_ERROR;
129 }
130
131 FreeTmpBuffer();
132 return RET_OK;
133 }
134
MallocAssignTmpBuffer()135 int ScaleFp16CPUKernel::MallocAssignTmpBuffer() {
136 scale_ = ConvertInputFp32toFp16(in_tensors_.at(1), static_cast<const lite::InnerContext *>(this->ms_context_));
137 if (scale_ == nullptr) {
138 return RET_ERROR;
139 }
140 if (in_tensors_.size() == 3) {
141 offset_ = ConvertInputFp32toFp16(in_tensors_.at(2), static_cast<const lite::InnerContext *>(this->ms_context_));
142 if (offset_ == nullptr) {
143 return RET_ERROR;
144 }
145 } else {
146 MS_CHECK_INT_MUL_NOT_OVERFLOW(in_tensors_.at(1)->ElementsNum(), sizeof(float16_t), RET_ERROR);
147 offset_ = reinterpret_cast<float16_t *>(
148 ms_context_->allocator->Malloc(in_tensors_.at(1)->ElementsNum() * sizeof(float16_t)));
149 if (offset_ == nullptr) {
150 MS_LOG(ERROR) << "Malloc data failed";
151 return RET_ERROR;
152 }
153 memset(offset_, 0, in_tensors_.at(1)->ElementsNum() * sizeof(float16_t));
154 }
155 return RET_OK;
156 }
157
FreeTmpBuffer()158 void ScaleFp16CPUKernel::FreeTmpBuffer() {
159 if (malloc_scale_ && scale_ != nullptr) {
160 ms_context_->allocator->Free(scale_);
161 scale_ = nullptr;
162 }
163 if (malloc_offset_ && offset_ != nullptr) {
164 ms_context_->allocator->Free(offset_);
165 offset_ = nullptr;
166 }
167 }
168
169 REG_KERNEL(kCPU, kNumberTypeFloat16, PrimitiveType_ScaleFusion, LiteKernelCreator<ScaleFp16CPUKernel>)
170 } // namespace mindspore::kernel
171