• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright 2023 Huawei Technologies Co., Ltd
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "nnacl/kernel/scale.h"
18 #include "nnacl/common_func.h"
19 #include "nnacl/scale_parameter.h"
20 #include "nnacl/fp32/scale_fp32.h"
21 #include "nnacl/tensor_c_utils.h"
22 #ifdef ENABLE_FP16
23 #include "nnacl/fp16/utils_fp16.h"
24 #include "nnacl/fp16/scale_fp16.h"
25 #endif
26 
ScaleRunF16(ScaleStruct * scale,int task_id,ActType act_type)27 int ScaleRunF16(ScaleStruct *scale, int task_id, ActType act_type) {
28 #ifdef ENABLE_FP16
29   switch (act_type) {
30     case ActType_Relu6:
31       DoScaleRelu6Fp16((const float16_t *)scale->input_, (float16_t *)scale->output_, (const float16_t *)scale->scale_,
32                        (const float16_t *)scale->offset_, task_id, scale);
33       break;
34     case ActType_Relu:
35       Fp16DoScaleRelu((const float16_t *)scale->input_, (float16_t *)scale->output_, (const float16_t *)scale->scale_,
36                       (const float16_t *)scale->offset_, task_id, scale);
37       break;
38     case ActType_No:
39       DoScaleFp16((const float16_t *)scale->input_, (float16_t *)scale->output_, (const float16_t *)scale->scale_,
40                   (const float16_t *)scale->offset_, task_id, scale);
41       break;
42     default:
43       return NNACL_ERR;
44   }
45   return NNACL_OK;
46 #endif
47   return NNACL_DISABLE_FP16;
48 }
49 
ScaleInitInputDataType(ScaleStruct * scale)50 int ScaleInitInputDataType(ScaleStruct *scale) {
51   if (scale->data_type_ == kNumberTypeFloat32) {
52     return NNACL_OK;
53   }
54 
55 #ifdef ENABLE_FP16
56   TensorC *scale_tensor = scale->base_.in_[SECOND_INPUT];
57   NNACL_CHECK_NULL_RETURN_ERR(scale_tensor);
58   if (scale_tensor->data_type_ != kNumberTypeFloat16 && scale->malloc_scale_ == false) {
59     scale->malloc_scale_ = true;
60     scale->scale_ = GetOrAllocFp16Data(scale_tensor, scale->base_.env_, true);
61   } else {
62     scale->malloc_scale_ = false;
63     scale->scale_ = NULL;
64   }
65 
66   if (scale->base_.in_size_ == TWO_TENSOR) {
67     /* already done in prepare */
68     return NNACL_OK;
69   }
70 
71   TensorC *offset_tensor = scale->base_.in_[SECOND_INPUT];
72   NNACL_CHECK_NULL_RETURN_ERR(scale_tensor);
73   if (offset_tensor->data_type_ != kNumberTypeFloat16 && scale->malloc_scale_ == false) {
74     scale->malloc_offset_ = true;
75     scale->offset_ = GetOrAllocFp16Data(offset_tensor, scale->base_.env_, true);
76   } else {
77     scale->malloc_offset_ = false;
78     scale->offset_ = NULL;
79   }
80   return NNACL_OK;
81 #endif
82   return NNACL_DISABLE_FP16;
83 }
84 
ScaleRunF32(ScaleStruct * scale,int task_id,ActType act_type)85 int ScaleRunF32(ScaleStruct *scale, int task_id, ActType act_type) {
86   switch (act_type) {
87     case ActType_Relu6:
88       DoScaleRelu6((const float *)scale->input_, (float *)scale->output_, (const float *)scale->scale_,
89                    (const float *)scale->offset_, task_id, scale);
90       break;
91     case ActType_Relu:
92       DoScaleRelu((const float *)scale->input_, (float *)scale->output_, (const float *)scale->scale_,
93                   (const float *)scale->offset_, task_id, scale);
94       break;
95     case ActType_No:
96       DoScale((const float *)scale->input_, (float *)scale->output_, (const float *)scale->scale_,
97               (const float *)scale->offset_, task_id, scale);
98       break;
99     default:
100       return NNACL_SCALE_UNSUPPORT_ACT_TYPE;
101   }
102   return NNACL_OK;
103 }
104 
ScaleRun(void * cdata,int task_id,float l,float r)105 int ScaleRun(void *cdata, int task_id, float l, float r) {
106   ScaleStruct *scale = (ScaleStruct *)cdata;
107   NNACL_CHECK_NULL_RETURN_ERR(scale);
108   ActType act_type = ((ScaleParameter *)scale->base_.param_)->activation_type_;
109   if (scale->data_type_ == kNumberTypeFloat16) {
110     return ScaleRunF16(scale, task_id, act_type);
111   } else if (scale->data_type_ == kNumberTypeFloat32) {
112     return ScaleRunF32(scale, task_id, act_type);
113   }
114   return NNACL_UNSUPPORTED_DATA_TYPE;
115 }
116 
ScaleCalculateParameter(ScaleStruct * scale)117 int ScaleCalculateParameter(ScaleStruct *scale) {
118   TensorC *input_tensor = scale->base_.in_[FIRST_INPUT];
119   NNACL_CHECK_NULL_RETURN_ERR(input_tensor);
120   TensorC *scale_tensor = scale->base_.in_[SECOND_INPUT];
121   NNACL_CHECK_NULL_RETURN_ERR(scale_tensor);
122   TensorC *output_tensor = scale->base_.out_[OUTPUT_INDEX];
123   NNACL_CHECK_NULL_RETURN_ERR(output_tensor);
124 
125   scale->outer_size_ = 1;
126   scale->axis_size_ = 1;
127   scale->inner_size_ = 1;
128   for (int i = 0; i < scale->axis_; i++) {
129     scale->outer_size_ *= input_tensor->shape_[i];
130   }
131   for (size_t i = 0; i < scale_tensor->shape_size_; i++) {
132     scale->axis_size_ *= input_tensor->shape_[i + scale->axis_];
133   }
134   for (size_t i = scale->axis_ + scale_tensor->shape_size_; i < input_tensor->shape_size_; i++) {
135     scale->inner_size_ *= input_tensor->shape_[i];
136   }
137 
138   scale->base_.thread_nr_ = MSMIN(scale->base_.thread_nr_, scale->outer_size_);
139   NNACL_CHECK_ZERO_RETURN_ERR(scale->base_.thread_nr_);
140 
141   return NNACL_OK;
142 }
143 
ScaleInitScaleOffset(ScaleStruct * scale)144 int ScaleInitScaleOffset(ScaleStruct *scale) {
145   TensorC *scale_tensor = scale->base_.in_[SECOND_INPUT];
146   NNACL_CHECK_NULL_RETURN_ERR(scale_tensor);
147   int data_type_size = DataTypeCSize(scale->data_type_);
148 
149   if (scale->base_.in_size_ == TWO_TENSOR) {
150     scale->malloc_offset_ = true;
151     int malloc_size = GetElementNum(scale_tensor) * data_type_size;
152     NNACL_CHECK_MALLOC_SIZE(malloc_size);
153     scale->offset_ = scale->base_.env_->Alloc(scale->base_.env_->allocator_, malloc_size);
154     NNACL_MALLOC_CHECK_NULL_RETURN_ERR(scale->offset_);
155     memset(scale->offset_, 0, malloc_size);
156   }
157 
158   if (scale->data_type_ == kNumberTypeFloat16) {
159     /*  handle fp16 scale and offset in compute */
160     return NNACL_OK;
161   }
162 
163   if (scale_tensor->data_ != NULL) {
164     scale->malloc_scale_ = true;
165     int malloc_size = GetElementNum(scale_tensor) * data_type_size;
166     NNACL_CHECK_MALLOC_SIZE(malloc_size);
167     scale->scale_ = scale->base_.env_->Alloc(scale->base_.env_->allocator_, malloc_size);
168     NNACL_MALLOC_CHECK_NULL_RETURN_ERR(scale->scale_);
169     (void)memcpy(scale->scale_, scale_tensor->data_, malloc_size);
170   } else {
171     scale->malloc_scale_ = false;
172     scale->scale_ = NULL;
173   }
174 
175   if (scale->base_.in_size_ == TWO_TENSOR) {
176     return NNACL_OK;
177   }
178   NNACL_CHECK_FALSE(scale->base_.in_size_ != THREE_TENSOR, NNACL_SCALE_INPUT_NUM_INVALID);
179 
180   TensorC *offset_tensor = scale->base_.in_[THIRD_INPUT];
181   NNACL_CHECK_NULL_RETURN_ERR(offset_tensor);
182   if (offset_tensor->data_ != NULL) {
183     scale->malloc_offset_ = true;
184     int malloc_size = GetElementNum(offset_tensor) * data_type_size;
185     NNACL_CHECK_MALLOC_SIZE(malloc_size);
186     scale->offset_ = scale->base_.env_->Alloc(scale->base_.env_->allocator_, malloc_size);
187     NNACL_MALLOC_CHECK_NULL_RETURN_ERR(scale->scale_);
188     (void)memcpy(scale->offset_, offset_tensor->data_, malloc_size);
189   } else {
190     scale->malloc_offset_ = false;
191     scale->offset_ = NULL;
192   }
193 
194   return NNACL_OK;
195 }
196 
ScaleCheckInputsOutputs(KernelBase * self)197 int ScaleCheckInputsOutputs(KernelBase *self) {
198   NNACL_CHECK_FALSE(self->in_size_ < TWO_TENSOR, NNACL_INPUT_TENSOR_ERROR);
199   NNACL_CHECK_FALSE(self->out_size_ < ONE_TENSOR, NNACL_INPUT_TENSOR_ERROR);
200 
201   for (size_t i = 0; i < self->in_size_; i++) {
202     TensorC *input_tensor = self->in_[i];
203     NNACL_CHECK_NULL_RETURN_ERR(input_tensor);
204     if (input_tensor->data_type_ != kNumberTypeFloat32 && input_tensor->data_type_ != kNumberTypeFloat16) {
205       return NNACL_UNSUPPORTED_DATA_TYPE;
206     }
207   }
208 
209   TensorC *output_tensor = self->out_[OUTPUT_INDEX];
210   NNACL_CHECK_NULL_RETURN_ERR(output_tensor);
211   if (output_tensor->data_type_ != kNumberTypeFloat32 && output_tensor->data_type_ != kNumberTypeFloat16) {
212     return NNACL_UNSUPPORTED_DATA_TYPE;
213   }
214   return NNACL_OK;
215 }
216 
ScaleRelease(struct KernelBase * self)217 int ScaleRelease(struct KernelBase *self) {
218   ScaleStruct *scale = (ScaleStruct *)self;
219   NNACL_CHECK_NULL_RETURN_ERR(scale);
220 
221   if (scale->malloc_scale_ && scale->scale_ != NULL) {
222     self->env_->Free(self->env_->allocator_, scale->scale_);
223     scale->scale_ = NULL;
224     scale->malloc_scale_ = false;
225   }
226 
227   if (scale->malloc_offset_ && scale->offset_ != NULL) {
228     self->env_->Free(self->env_->allocator_, scale->offset_);
229     scale->offset_ = NULL;
230     scale->malloc_offset_ = false;
231   }
232   return NNACL_OK;
233 }
234 
ScaleResize(struct KernelBase * self)235 int ScaleResize(struct KernelBase *self) {
236   ScaleStruct *scale = (ScaleStruct *)self;
237   NNACL_CHECK_NULL_RETURN_ERR(scale);
238 
239   TensorC *input_tensor = self->in_[FIRST_INPUT];
240   NNACL_CHECK_NULL_RETURN_ERR(input_tensor);
241   TensorC *scale_tensor = self->in_[SECOND_INPUT];
242   NNACL_CHECK_NULL_RETURN_ERR(scale_tensor);
243 
244   int origin_axis = ((ScaleParameter *)self->param_)->axis_;
245   scale->axis_ = origin_axis < 0 ? origin_axis + input_tensor->shape_size_ : origin_axis;
246 
247   for (size_t i = 0; i < scale_tensor->shape_size_; i++) {
248     if (i + scale->axis_ >= input_tensor->shape_size_) {
249       return NNACL_SCALE_AXIS_AND_SHAPE_UNMATCH;
250     }
251     if (input_tensor->shape_[i + scale->axis_] != scale_tensor->shape_[i]) {
252       return NNACL_SCALE_SCALE_SHAPE_UNMATCH;
253     }
254   }
255 
256   int ret = ScaleCalculateParameter(scale);
257   if (ret != NNACL_OK) {
258     return ret;
259   }
260   return NNACL_OK;
261 }
262 
ScaleCompute(struct KernelBase * self)263 int ScaleCompute(struct KernelBase *self) {
264   ScaleStruct *scale = (ScaleStruct *)self;
265   NNACL_CHECK_NULL_RETURN_ERR(scale);
266 
267   TensorC *input_tensor = self->in_[FIRST_INPUT];
268   NNACL_CHECK_NULL_RETURN_ERR(input_tensor);
269   scale->input_ = input_tensor->data_;
270   NNACL_CHECK_NULL_RETURN_ERR(scale->input_);
271 
272   TensorC *output_tensor = self->out_[OUTPUT_INDEX];
273   NNACL_CHECK_NULL_RETURN_ERR(output_tensor);
274   scale->output_ = output_tensor->data_;
275   NNACL_CHECK_NULL_RETURN_ERR(scale->output_);
276 
277   int ret = ScaleInitInputDataType(scale);
278   if (ret != NNACL_OK) {
279     return ret;
280   }
281 
282   if (!scale->malloc_scale_) {
283     TensorC *scale_tensor = self->in_[SECOND_INPUT];
284     NNACL_CHECK_NULL_RETURN_ERR(scale_tensor);
285     scale->scale_ = scale_tensor->data_;
286     NNACL_CHECK_NULL_RETURN_ERR(scale->scale_);
287   }
288 
289   if (!scale->malloc_offset_) {
290     TensorC *offset_tensor = self->in_[THIRD_INPUT];
291     NNACL_CHECK_NULL_RETURN_ERR(offset_tensor);
292     scale->offset_ = offset_tensor->data_;
293     NNACL_CHECK_NULL_RETURN_ERR(scale->offset_);
294   }
295 
296   return self->env_->ParallelLaunch(self->env_->thread_pool_, ScaleRun, self, self->thread_nr_);
297 }
298 
ScalePrepare(struct KernelBase * self)299 int ScalePrepare(struct KernelBase *self) {
300   ScaleStruct *scale = (ScaleStruct *)self;
301   NNACL_CHECK_NULL_RETURN_ERR(scale);
302 
303   int ret = ScaleCheckInputsOutputs(self);
304   if (ret != NNACL_OK) {
305     return ret;
306   }
307 
308   ret = ScaleInitScaleOffset(scale);
309   if (ret != NNACL_OK) {
310     return ret;
311   }
312 
313   return NNACL_OK;
314 }
315 
CreateScale(OpParameter * param,int data_type)316 KernelBase *CreateScale(OpParameter *param, int data_type) {
317   ScaleStruct *scale = (ScaleStruct *)malloc(sizeof(ScaleStruct));
318   NNACL_MALLOC_CHECK_NULL_RETURN_NULL(scale);
319   memset(scale, 0, sizeof(ScaleStruct));
320   scale->data_type_ = data_type;
321   scale->scale_ = NULL;
322   scale->offset_ = NULL;
323   scale->malloc_scale_ = false;
324   scale->malloc_offset_ = false;
325   scale->base_.Prepare = ScalePrepare;
326   scale->base_.Resize = ScaleResize;
327   scale->base_.Compute = ScaleCompute;
328   scale->base_.Release = ScaleRelease;
329   return (KernelBase *)scale;
330 }
331 
332 REG_KERNEL_CREATOR(PrimType_ScaleFusion, kNumberTypeFloat16, CreateScale)
333 REG_KERNEL_CREATOR(PrimType_ScaleFusion, kNumberTypeFloat32, CreateScale)
334