• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright 2023 Huawei Technologies Co., Ltd
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "nnacl/kernel/biasadd.h"
18 #include "nnacl/tensor_c.h"
19 #include "nnacl/op_base.h"
20 #include "nnacl/fp32/bias_add.h"
21 #include "nnacl/kernel/default_kernel_base.h"
22 
23 #define BIAS_ADD_PER_UNIT_LOAD_NUM 2
24 #define BIAS_ADD_PER_UNIT_STORE_NUM 1
25 #define SPLIT_POINTS_SIZE 32
26 
27 typedef struct BiasAddStruct {
28   KernelBase base_;
29   int64_t inner_num_;
30   int64_t outer_num_;
31   int64_t total_num_;
32   bool batch_priority_;
33   int64_t split_points_[SPLIT_POINTS_SIZE];
34   int split_pionts_size_;
35 } BiasAddStruct;
36 
ChooseBiasThreadCuttingStrategy(KernelBase * self)37 int ChooseBiasThreadCuttingStrategy(KernelBase *self) {
38   BiasAddStruct *bias_add = (BiasAddStruct *)self;
39   NNACL_CHECK_NULL_RETURN_ERR(bias_add);
40   self->thread_nr_ = self->UpdateThread(TC_PTYPE(PrimType_BiasAdd), BIAS_ADD_PER_UNIT_LOAD_NUM,
41                                         BIAS_ADD_PER_UNIT_STORE_NUM, bias_add->total_num_, self->thread_nr_);
42   if (self->thread_nr_ > SPLIT_POINTS_SIZE) {
43     self->thread_nr_ = SPLIT_POINTS_SIZE;
44   }
45 
46   bias_add->split_pionts_size_ = 0;
47   int64_t block_size = 1;
48   block_size = bias_add->total_num_ / self->thread_nr_;
49   int64_t remain_data = bias_add->total_num_ - block_size * self->thread_nr_;
50   int64_t split_point = 0;
51   while (split_point < bias_add->total_num_) {
52     bias_add->split_points_[bias_add->split_pionts_size_++] = split_point;
53     split_point += block_size;
54     if (remain_data > 0) {
55       ++split_point;
56       --remain_data;
57     }
58   }
59   self->thread_nr_ = bias_add->split_pionts_size_;
60   if (bias_add->inner_num_ >= C64NUM && block_size / bias_add->inner_num_ >= C6NUM) {
61     bias_add->batch_priority_ = true;
62   } else {
63     bias_add->batch_priority_ = false;
64   }
65   return NNACL_OK;
66 }
67 
BiasRun(void * cdata,int task_id,float l,float r)68 int BiasRun(void *cdata, int task_id, float l, float r) {
69   BiasAddStruct *bias_add = (BiasAddStruct *)cdata;
70   NNACL_CHECK_NULL_RETURN_ERR(bias_add);
71 
72   float *input = (float *)(bias_add->base_.in_[FIRST_INPUT]->data_);
73   NNACL_CHECK_NULL_RETURN_ERR(input);
74   float *bias = (float *)(bias_add->base_.in_[SECOND_INPUT]->data_);
75   NNACL_CHECK_NULL_RETURN_ERR(bias);
76   float *output = (float *)(bias_add->base_.out_[FIRST_INPUT]->data_);
77   NNACL_CHECK_NULL_RETURN_ERR(output);
78 
79   int64_t block_start = bias_add->split_points_[task_id];
80   int64_t block_end = bias_add->total_num_;
81   if ((task_id + 1) < bias_add->split_pionts_size_) {
82     block_end = bias_add->split_points_[task_id + 1];
83   }
84   BiasAddOpt(input, bias, output, block_start, block_end, bias_add->inner_num_, bias_add->batch_priority_);
85   return NNACL_OK;
86 }
87 
BiasAddResize(struct KernelBase * self)88 int BiasAddResize(struct KernelBase *self) {
89   BiasAddStruct *bias_add = (BiasAddStruct *)self;
90   NNACL_CHECK_NULL_RETURN_ERR(bias_add);
91 
92   TensorC *in_tensor = self->in_[FIRST_INPUT];
93   TensorC *add_tensor = self->in_[SECOND_INPUT];
94   NNACL_CHECK_FALSE(in_tensor->shape_size_ == 0, NNACL_ERR);
95   NNACL_CHECK_FALSE(add_tensor->shape_size_ == 0, NNACL_ERR);
96   NNACL_CHECK_FALSE(in_tensor->shape_size_ < add_tensor->shape_size_, NNACL_ERR);
97 
98   size_t dim_offset = in_tensor->shape_size_ - add_tensor->shape_size_;
99   bias_add->inner_num_ = 1;
100   for (size_t i = 0; i < add_tensor->shape_size_; ++i) {
101     NNACL_CHECK_FALSE(in_tensor->shape_[i + dim_offset] != add_tensor->shape_[i], NNACL_BIAS_ADD_SHAPE_NOT_MATCH);
102     NNACL_CHECK_FALSE(INT_MUL_OVERFLOW(in_tensor->shape_[i], bias_add->inner_num_), NNACL_BIAS_ADD_SHAPE_OVERFLOW);
103     bias_add->inner_num_ *= add_tensor->shape_[i];
104   }
105 
106   bias_add->outer_num_ = 1;
107   for (size_t i = 0; i < dim_offset; ++i) {
108     NNACL_CHECK_FALSE(INT_MUL_OVERFLOW(in_tensor->shape_[i], bias_add->outer_num_), NNACL_BIAS_ADD_SHAPE_OVERFLOW);
109     bias_add->outer_num_ *= in_tensor->shape_[i];
110   }
111 
112   NNACL_CHECK_FALSE(INT_MUL_OVERFLOW(bias_add->inner_num_, bias_add->outer_num_), NNACL_BIAS_ADD_SHAPE_OVERFLOW);
113   bias_add->total_num_ = bias_add->inner_num_ * bias_add->outer_num_;
114   return ChooseBiasThreadCuttingStrategy(self);
115 }
116 
BiasAddCompute(struct KernelBase * self)117 int BiasAddCompute(struct KernelBase *self) {
118   return self->env_->ParallelLaunch(self->env_->thread_pool_, BiasRun, self, self->thread_nr_);
119 }
120 
CreateBiasAdd(OpParameter * param,int data_type)121 KernelBase *CreateBiasAdd(OpParameter *param, int data_type) {
122   BiasAddStruct *bias_add = (BiasAddStruct *)malloc(sizeof(BiasAddStruct));
123   NNACL_MALLOC_CHECK_NULL_RETURN_NULL(bias_add);
124   bias_add->base_.Prepare = DefaultPrepare2In1Out;
125   bias_add->base_.Resize = BiasAddResize;
126   bias_add->base_.Release = DefaultRelease;
127   bias_add->base_.Compute = BiasAddCompute;
128   return (KernelBase *)bias_add;
129 }
130 
131 REG_KERNEL_CREATOR(PrimType_BiasAdd, kNumberTypeFloat32, CreateBiasAdd)
132