1 /**
2 * Copyright 2023 Huawei Technologies Co., Ltd
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "nnacl/kernel/biasadd.h"
18 #include "nnacl/tensor_c.h"
19 #include "nnacl/op_base.h"
20 #include "nnacl/fp32/bias_add.h"
21 #include "nnacl/kernel/default_kernel_base.h"
22
23 #define BIAS_ADD_PER_UNIT_LOAD_NUM 2
24 #define BIAS_ADD_PER_UNIT_STORE_NUM 1
25 #define SPLIT_POINTS_SIZE 32
26
27 typedef struct BiasAddStruct {
28 KernelBase base_;
29 int64_t inner_num_;
30 int64_t outer_num_;
31 int64_t total_num_;
32 bool batch_priority_;
33 int64_t split_points_[SPLIT_POINTS_SIZE];
34 int split_pionts_size_;
35 } BiasAddStruct;
36
ChooseBiasThreadCuttingStrategy(KernelBase * self)37 int ChooseBiasThreadCuttingStrategy(KernelBase *self) {
38 BiasAddStruct *bias_add = (BiasAddStruct *)self;
39 NNACL_CHECK_NULL_RETURN_ERR(bias_add);
40 self->thread_nr_ = self->UpdateThread(TC_PTYPE(PrimType_BiasAdd), BIAS_ADD_PER_UNIT_LOAD_NUM,
41 BIAS_ADD_PER_UNIT_STORE_NUM, bias_add->total_num_, self->thread_nr_);
42 if (self->thread_nr_ > SPLIT_POINTS_SIZE) {
43 self->thread_nr_ = SPLIT_POINTS_SIZE;
44 }
45
46 bias_add->split_pionts_size_ = 0;
47 int64_t block_size = 1;
48 block_size = bias_add->total_num_ / self->thread_nr_;
49 int64_t remain_data = bias_add->total_num_ - block_size * self->thread_nr_;
50 int64_t split_point = 0;
51 while (split_point < bias_add->total_num_) {
52 bias_add->split_points_[bias_add->split_pionts_size_++] = split_point;
53 split_point += block_size;
54 if (remain_data > 0) {
55 ++split_point;
56 --remain_data;
57 }
58 }
59 self->thread_nr_ = bias_add->split_pionts_size_;
60 if (bias_add->inner_num_ >= C64NUM && block_size / bias_add->inner_num_ >= C6NUM) {
61 bias_add->batch_priority_ = true;
62 } else {
63 bias_add->batch_priority_ = false;
64 }
65 return NNACL_OK;
66 }
67
BiasRun(void * cdata,int task_id,float l,float r)68 int BiasRun(void *cdata, int task_id, float l, float r) {
69 BiasAddStruct *bias_add = (BiasAddStruct *)cdata;
70 NNACL_CHECK_NULL_RETURN_ERR(bias_add);
71
72 float *input = (float *)(bias_add->base_.in_[FIRST_INPUT]->data_);
73 NNACL_CHECK_NULL_RETURN_ERR(input);
74 float *bias = (float *)(bias_add->base_.in_[SECOND_INPUT]->data_);
75 NNACL_CHECK_NULL_RETURN_ERR(bias);
76 float *output = (float *)(bias_add->base_.out_[FIRST_INPUT]->data_);
77 NNACL_CHECK_NULL_RETURN_ERR(output);
78
79 int64_t block_start = bias_add->split_points_[task_id];
80 int64_t block_end = bias_add->total_num_;
81 if ((task_id + 1) < bias_add->split_pionts_size_) {
82 block_end = bias_add->split_points_[task_id + 1];
83 }
84 BiasAddOpt(input, bias, output, block_start, block_end, bias_add->inner_num_, bias_add->batch_priority_);
85 return NNACL_OK;
86 }
87
BiasAddResize(struct KernelBase * self)88 int BiasAddResize(struct KernelBase *self) {
89 BiasAddStruct *bias_add = (BiasAddStruct *)self;
90 NNACL_CHECK_NULL_RETURN_ERR(bias_add);
91
92 TensorC *in_tensor = self->in_[FIRST_INPUT];
93 TensorC *add_tensor = self->in_[SECOND_INPUT];
94 NNACL_CHECK_FALSE(in_tensor->shape_size_ == 0, NNACL_ERR);
95 NNACL_CHECK_FALSE(add_tensor->shape_size_ == 0, NNACL_ERR);
96 NNACL_CHECK_FALSE(in_tensor->shape_size_ < add_tensor->shape_size_, NNACL_ERR);
97
98 size_t dim_offset = in_tensor->shape_size_ - add_tensor->shape_size_;
99 bias_add->inner_num_ = 1;
100 for (size_t i = 0; i < add_tensor->shape_size_; ++i) {
101 NNACL_CHECK_FALSE(in_tensor->shape_[i + dim_offset] != add_tensor->shape_[i], NNACL_BIAS_ADD_SHAPE_NOT_MATCH);
102 NNACL_CHECK_FALSE(INT_MUL_OVERFLOW(in_tensor->shape_[i], bias_add->inner_num_), NNACL_BIAS_ADD_SHAPE_OVERFLOW);
103 bias_add->inner_num_ *= add_tensor->shape_[i];
104 }
105
106 bias_add->outer_num_ = 1;
107 for (size_t i = 0; i < dim_offset; ++i) {
108 NNACL_CHECK_FALSE(INT_MUL_OVERFLOW(in_tensor->shape_[i], bias_add->outer_num_), NNACL_BIAS_ADD_SHAPE_OVERFLOW);
109 bias_add->outer_num_ *= in_tensor->shape_[i];
110 }
111
112 NNACL_CHECK_FALSE(INT_MUL_OVERFLOW(bias_add->inner_num_, bias_add->outer_num_), NNACL_BIAS_ADD_SHAPE_OVERFLOW);
113 bias_add->total_num_ = bias_add->inner_num_ * bias_add->outer_num_;
114 return ChooseBiasThreadCuttingStrategy(self);
115 }
116
BiasAddCompute(struct KernelBase * self)117 int BiasAddCompute(struct KernelBase *self) {
118 return self->env_->ParallelLaunch(self->env_->thread_pool_, BiasRun, self, self->thread_nr_);
119 }
120
CreateBiasAdd(OpParameter * param,int data_type)121 KernelBase *CreateBiasAdd(OpParameter *param, int data_type) {
122 BiasAddStruct *bias_add = (BiasAddStruct *)malloc(sizeof(BiasAddStruct));
123 NNACL_MALLOC_CHECK_NULL_RETURN_NULL(bias_add);
124 bias_add->base_.Prepare = DefaultPrepare2In1Out;
125 bias_add->base_.Resize = BiasAddResize;
126 bias_add->base_.Release = DefaultRelease;
127 bias_add->base_.Compute = BiasAddCompute;
128 return (KernelBase *)bias_add;
129 }
130
131 REG_KERNEL_CREATOR(PrimType_BiasAdd, kNumberTypeFloat32, CreateBiasAdd)
132