1 /**
2 * Copyright 2023 Huawei Technologies Co., Ltd
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "nnacl/kernel/scale.h"
18 #include "nnacl/common_func.h"
19 #include "nnacl/scale_parameter.h"
20 #include "nnacl/fp32/scale_fp32.h"
21 #include "nnacl/tensor_c_utils.h"
22 #ifdef ENABLE_FP16
23 #include "nnacl/fp16/utils_fp16.h"
24 #include "nnacl/fp16/scale_fp16.h"
25 #endif
26
ScaleRunF16(ScaleStruct * scale,int task_id,ActType act_type)27 int ScaleRunF16(ScaleStruct *scale, int task_id, ActType act_type) {
28 #ifdef ENABLE_FP16
29 switch (act_type) {
30 case ActType_Relu6:
31 DoScaleRelu6Fp16((const float16_t *)scale->input_, (float16_t *)scale->output_, (const float16_t *)scale->scale_,
32 (const float16_t *)scale->offset_, task_id, scale);
33 break;
34 case ActType_Relu:
35 Fp16DoScaleRelu((const float16_t *)scale->input_, (float16_t *)scale->output_, (const float16_t *)scale->scale_,
36 (const float16_t *)scale->offset_, task_id, scale);
37 break;
38 case ActType_No:
39 DoScaleFp16((const float16_t *)scale->input_, (float16_t *)scale->output_, (const float16_t *)scale->scale_,
40 (const float16_t *)scale->offset_, task_id, scale);
41 break;
42 default:
43 return NNACL_ERR;
44 }
45 return NNACL_OK;
46 #endif
47 return NNACL_DISABLE_FP16;
48 }
49
ScaleInitInputDataType(ScaleStruct * scale)50 int ScaleInitInputDataType(ScaleStruct *scale) {
51 if (scale->data_type_ == kNumberTypeFloat32) {
52 return NNACL_OK;
53 }
54
55 #ifdef ENABLE_FP16
56 TensorC *scale_tensor = scale->base_.in_[SECOND_INPUT];
57 NNACL_CHECK_NULL_RETURN_ERR(scale_tensor);
58 if (scale_tensor->data_type_ != kNumberTypeFloat16 && scale->malloc_scale_ == false) {
59 scale->malloc_scale_ = true;
60 scale->scale_ = GetOrAllocFp16Data(scale_tensor, scale->base_.env_, true);
61 } else {
62 scale->malloc_scale_ = false;
63 scale->scale_ = NULL;
64 }
65
66 if (scale->base_.in_size_ == TWO_TENSOR) {
67 /* already done in prepare */
68 return NNACL_OK;
69 }
70
71 TensorC *offset_tensor = scale->base_.in_[SECOND_INPUT];
72 NNACL_CHECK_NULL_RETURN_ERR(scale_tensor);
73 if (offset_tensor->data_type_ != kNumberTypeFloat16 && scale->malloc_scale_ == false) {
74 scale->malloc_offset_ = true;
75 scale->offset_ = GetOrAllocFp16Data(offset_tensor, scale->base_.env_, true);
76 } else {
77 scale->malloc_offset_ = false;
78 scale->offset_ = NULL;
79 }
80 return NNACL_OK;
81 #endif
82 return NNACL_DISABLE_FP16;
83 }
84
ScaleRunF32(ScaleStruct * scale,int task_id,ActType act_type)85 int ScaleRunF32(ScaleStruct *scale, int task_id, ActType act_type) {
86 switch (act_type) {
87 case ActType_Relu6:
88 DoScaleRelu6((const float *)scale->input_, (float *)scale->output_, (const float *)scale->scale_,
89 (const float *)scale->offset_, task_id, scale);
90 break;
91 case ActType_Relu:
92 DoScaleRelu((const float *)scale->input_, (float *)scale->output_, (const float *)scale->scale_,
93 (const float *)scale->offset_, task_id, scale);
94 break;
95 case ActType_No:
96 DoScale((const float *)scale->input_, (float *)scale->output_, (const float *)scale->scale_,
97 (const float *)scale->offset_, task_id, scale);
98 break;
99 default:
100 return NNACL_SCALE_UNSUPPORT_ACT_TYPE;
101 }
102 return NNACL_OK;
103 }
104
ScaleRun(void * cdata,int task_id,float l,float r)105 int ScaleRun(void *cdata, int task_id, float l, float r) {
106 ScaleStruct *scale = (ScaleStruct *)cdata;
107 NNACL_CHECK_NULL_RETURN_ERR(scale);
108 ActType act_type = ((ScaleParameter *)scale->base_.param_)->activation_type_;
109 if (scale->data_type_ == kNumberTypeFloat16) {
110 return ScaleRunF16(scale, task_id, act_type);
111 } else if (scale->data_type_ == kNumberTypeFloat32) {
112 return ScaleRunF32(scale, task_id, act_type);
113 }
114 return NNACL_UNSUPPORTED_DATA_TYPE;
115 }
116
ScaleCalculateParameter(ScaleStruct * scale)117 int ScaleCalculateParameter(ScaleStruct *scale) {
118 TensorC *input_tensor = scale->base_.in_[FIRST_INPUT];
119 NNACL_CHECK_NULL_RETURN_ERR(input_tensor);
120 TensorC *scale_tensor = scale->base_.in_[SECOND_INPUT];
121 NNACL_CHECK_NULL_RETURN_ERR(scale_tensor);
122 TensorC *output_tensor = scale->base_.out_[OUTPUT_INDEX];
123 NNACL_CHECK_NULL_RETURN_ERR(output_tensor);
124
125 scale->outer_size_ = 1;
126 scale->axis_size_ = 1;
127 scale->inner_size_ = 1;
128 for (int i = 0; i < scale->axis_; i++) {
129 scale->outer_size_ *= input_tensor->shape_[i];
130 }
131 for (size_t i = 0; i < scale_tensor->shape_size_; i++) {
132 scale->axis_size_ *= input_tensor->shape_[i + scale->axis_];
133 }
134 for (size_t i = scale->axis_ + scale_tensor->shape_size_; i < input_tensor->shape_size_; i++) {
135 scale->inner_size_ *= input_tensor->shape_[i];
136 }
137
138 scale->base_.thread_nr_ = MSMIN(scale->base_.thread_nr_, scale->outer_size_);
139 NNACL_CHECK_ZERO_RETURN_ERR(scale->base_.thread_nr_);
140
141 return NNACL_OK;
142 }
143
ScaleInitScaleOffset(ScaleStruct * scale)144 int ScaleInitScaleOffset(ScaleStruct *scale) {
145 TensorC *scale_tensor = scale->base_.in_[SECOND_INPUT];
146 NNACL_CHECK_NULL_RETURN_ERR(scale_tensor);
147 int data_type_size = DataTypeCSize(scale->data_type_);
148
149 if (scale->base_.in_size_ == TWO_TENSOR) {
150 scale->malloc_offset_ = true;
151 int malloc_size = GetElementNum(scale_tensor) * data_type_size;
152 NNACL_CHECK_MALLOC_SIZE(malloc_size);
153 scale->offset_ = scale->base_.env_->Alloc(scale->base_.env_->allocator_, malloc_size);
154 NNACL_MALLOC_CHECK_NULL_RETURN_ERR(scale->offset_);
155 memset(scale->offset_, 0, malloc_size);
156 }
157
158 if (scale->data_type_ == kNumberTypeFloat16) {
159 /* handle fp16 scale and offset in compute */
160 return NNACL_OK;
161 }
162
163 if (scale_tensor->data_ != NULL) {
164 scale->malloc_scale_ = true;
165 int malloc_size = GetElementNum(scale_tensor) * data_type_size;
166 NNACL_CHECK_MALLOC_SIZE(malloc_size);
167 scale->scale_ = scale->base_.env_->Alloc(scale->base_.env_->allocator_, malloc_size);
168 NNACL_MALLOC_CHECK_NULL_RETURN_ERR(scale->scale_);
169 (void)memcpy(scale->scale_, scale_tensor->data_, malloc_size);
170 } else {
171 scale->malloc_scale_ = false;
172 scale->scale_ = NULL;
173 }
174
175 if (scale->base_.in_size_ == TWO_TENSOR) {
176 return NNACL_OK;
177 }
178 NNACL_CHECK_FALSE(scale->base_.in_size_ != THREE_TENSOR, NNACL_SCALE_INPUT_NUM_INVALID);
179
180 TensorC *offset_tensor = scale->base_.in_[THIRD_INPUT];
181 NNACL_CHECK_NULL_RETURN_ERR(offset_tensor);
182 if (offset_tensor->data_ != NULL) {
183 scale->malloc_offset_ = true;
184 int malloc_size = GetElementNum(offset_tensor) * data_type_size;
185 NNACL_CHECK_MALLOC_SIZE(malloc_size);
186 scale->offset_ = scale->base_.env_->Alloc(scale->base_.env_->allocator_, malloc_size);
187 NNACL_MALLOC_CHECK_NULL_RETURN_ERR(scale->scale_);
188 (void)memcpy(scale->offset_, offset_tensor->data_, malloc_size);
189 } else {
190 scale->malloc_offset_ = false;
191 scale->offset_ = NULL;
192 }
193
194 return NNACL_OK;
195 }
196
ScaleCheckInputsOutputs(KernelBase * self)197 int ScaleCheckInputsOutputs(KernelBase *self) {
198 NNACL_CHECK_FALSE(self->in_size_ < TWO_TENSOR, NNACL_INPUT_TENSOR_ERROR);
199 NNACL_CHECK_FALSE(self->out_size_ < ONE_TENSOR, NNACL_INPUT_TENSOR_ERROR);
200
201 for (size_t i = 0; i < self->in_size_; i++) {
202 TensorC *input_tensor = self->in_[i];
203 NNACL_CHECK_NULL_RETURN_ERR(input_tensor);
204 if (input_tensor->data_type_ != kNumberTypeFloat32 && input_tensor->data_type_ != kNumberTypeFloat16) {
205 return NNACL_UNSUPPORTED_DATA_TYPE;
206 }
207 }
208
209 TensorC *output_tensor = self->out_[OUTPUT_INDEX];
210 NNACL_CHECK_NULL_RETURN_ERR(output_tensor);
211 if (output_tensor->data_type_ != kNumberTypeFloat32 && output_tensor->data_type_ != kNumberTypeFloat16) {
212 return NNACL_UNSUPPORTED_DATA_TYPE;
213 }
214 return NNACL_OK;
215 }
216
ScaleRelease(struct KernelBase * self)217 int ScaleRelease(struct KernelBase *self) {
218 ScaleStruct *scale = (ScaleStruct *)self;
219 NNACL_CHECK_NULL_RETURN_ERR(scale);
220
221 if (scale->malloc_scale_ && scale->scale_ != NULL) {
222 self->env_->Free(self->env_->allocator_, scale->scale_);
223 scale->scale_ = NULL;
224 scale->malloc_scale_ = false;
225 }
226
227 if (scale->malloc_offset_ && scale->offset_ != NULL) {
228 self->env_->Free(self->env_->allocator_, scale->offset_);
229 scale->offset_ = NULL;
230 scale->malloc_offset_ = false;
231 }
232 return NNACL_OK;
233 }
234
ScaleResize(struct KernelBase * self)235 int ScaleResize(struct KernelBase *self) {
236 ScaleStruct *scale = (ScaleStruct *)self;
237 NNACL_CHECK_NULL_RETURN_ERR(scale);
238
239 TensorC *input_tensor = self->in_[FIRST_INPUT];
240 NNACL_CHECK_NULL_RETURN_ERR(input_tensor);
241 TensorC *scale_tensor = self->in_[SECOND_INPUT];
242 NNACL_CHECK_NULL_RETURN_ERR(scale_tensor);
243
244 int origin_axis = ((ScaleParameter *)self->param_)->axis_;
245 scale->axis_ = origin_axis < 0 ? origin_axis + input_tensor->shape_size_ : origin_axis;
246
247 for (size_t i = 0; i < scale_tensor->shape_size_; i++) {
248 if (i + scale->axis_ >= input_tensor->shape_size_) {
249 return NNACL_SCALE_AXIS_AND_SHAPE_UNMATCH;
250 }
251 if (input_tensor->shape_[i + scale->axis_] != scale_tensor->shape_[i]) {
252 return NNACL_SCALE_SCALE_SHAPE_UNMATCH;
253 }
254 }
255
256 int ret = ScaleCalculateParameter(scale);
257 if (ret != NNACL_OK) {
258 return ret;
259 }
260 return NNACL_OK;
261 }
262
ScaleCompute(struct KernelBase * self)263 int ScaleCompute(struct KernelBase *self) {
264 ScaleStruct *scale = (ScaleStruct *)self;
265 NNACL_CHECK_NULL_RETURN_ERR(scale);
266
267 TensorC *input_tensor = self->in_[FIRST_INPUT];
268 NNACL_CHECK_NULL_RETURN_ERR(input_tensor);
269 scale->input_ = input_tensor->data_;
270 NNACL_CHECK_NULL_RETURN_ERR(scale->input_);
271
272 TensorC *output_tensor = self->out_[OUTPUT_INDEX];
273 NNACL_CHECK_NULL_RETURN_ERR(output_tensor);
274 scale->output_ = output_tensor->data_;
275 NNACL_CHECK_NULL_RETURN_ERR(scale->output_);
276
277 int ret = ScaleInitInputDataType(scale);
278 if (ret != NNACL_OK) {
279 return ret;
280 }
281
282 if (!scale->malloc_scale_) {
283 TensorC *scale_tensor = self->in_[SECOND_INPUT];
284 NNACL_CHECK_NULL_RETURN_ERR(scale_tensor);
285 scale->scale_ = scale_tensor->data_;
286 NNACL_CHECK_NULL_RETURN_ERR(scale->scale_);
287 }
288
289 if (!scale->malloc_offset_) {
290 TensorC *offset_tensor = self->in_[THIRD_INPUT];
291 NNACL_CHECK_NULL_RETURN_ERR(offset_tensor);
292 scale->offset_ = offset_tensor->data_;
293 NNACL_CHECK_NULL_RETURN_ERR(scale->offset_);
294 }
295
296 return self->env_->ParallelLaunch(self->env_->thread_pool_, ScaleRun, self, self->thread_nr_);
297 }
298
ScalePrepare(struct KernelBase * self)299 int ScalePrepare(struct KernelBase *self) {
300 ScaleStruct *scale = (ScaleStruct *)self;
301 NNACL_CHECK_NULL_RETURN_ERR(scale);
302
303 int ret = ScaleCheckInputsOutputs(self);
304 if (ret != NNACL_OK) {
305 return ret;
306 }
307
308 ret = ScaleInitScaleOffset(scale);
309 if (ret != NNACL_OK) {
310 return ret;
311 }
312
313 return NNACL_OK;
314 }
315
CreateScale(OpParameter * param,int data_type)316 KernelBase *CreateScale(OpParameter *param, int data_type) {
317 ScaleStruct *scale = (ScaleStruct *)malloc(sizeof(ScaleStruct));
318 NNACL_MALLOC_CHECK_NULL_RETURN_NULL(scale);
319 memset(scale, 0, sizeof(ScaleStruct));
320 scale->data_type_ = data_type;
321 scale->scale_ = NULL;
322 scale->offset_ = NULL;
323 scale->malloc_scale_ = false;
324 scale->malloc_offset_ = false;
325 scale->base_.Prepare = ScalePrepare;
326 scale->base_.Resize = ScaleResize;
327 scale->base_.Compute = ScaleCompute;
328 scale->base_.Release = ScaleRelease;
329 return (KernelBase *)scale;
330 }
331
332 REG_KERNEL_CREATOR(PrimType_ScaleFusion, kNumberTypeFloat16, CreateScale)
333 REG_KERNEL_CREATOR(PrimType_ScaleFusion, kNumberTypeFloat32, CreateScale)
334