1 /**
2 * Copyright 2023 Huawei Technologies Co., Ltd
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "nnacl/kernel/addn.h"
18 #include "nnacl/fp32/add_fp32.h"
19 #include "nnacl/tensor_c_utils.h"
20 #include "nnacl/kernel/default_kernel_base.h"
21 #ifdef ENABLE_FP16
22 #include "nnacl/fp16/arithmetic_fp16.h"
23 #endif
24
AddNLaunch(void * cdata,int task_id,float l,float r)25 int AddNLaunch(void *cdata, int task_id, float l, float r) {
26 AddNStruct *addn = (AddNStruct *)cdata;
27 NNACL_CHECK_NULL_RETURN_ERR(addn);
28
29 int count_per_thread = UP_DIV(addn->elements_num_, addn->base_.thread_nr_);
30 NNACL_CHECK_INT_MUL_NOT_OVERFLOW(task_id, count_per_thread, NNACL_ERR);
31 int count = MSMIN(count_per_thread, addn->elements_num_ - task_id * count_per_thread);
32 int stride = count_per_thread * task_id;
33
34 #ifdef ENABLE_FP16
35 if (addn->data_type_ == kNumberTypeFloat16) {
36 return ElementAddFp16((float16_t *)addn->in1_addr_ + stride, (float16_t *)addn->in2_addr_ + stride,
37 (float16_t *)addn->out_addr_ + stride, count);
38 }
39 #endif
40 return ElementAdd((float *)addn->in1_addr_ + stride, (float *)addn->in2_addr_ + stride,
41 (float *)addn->out_addr_ + stride, count);
42 }
43
AddNCompute(AddNStruct * addn,bool same_shape,bool first_scalar)44 void AddNCompute(AddNStruct *addn, bool same_shape, bool first_scalar) {
45 #ifdef ENABLE_FP16
46 if (addn->data_type_ == kNumberTypeFloat16) {
47 if (same_shape) {
48 ElementAddFp16((float16_t *)addn->in1_addr_, (float16_t *)addn->in2_addr_, (float16_t *)addn->out_addr_,
49 addn->elements_num_);
50 } else {
51 ElementOptAddFp16((float16_t *)addn->in1_addr_, (float16_t *)addn->in2_addr_, (float16_t *)addn->out_addr_,
52 addn->elements_num_, first_scalar);
53 }
54 return;
55 }
56 #endif
57
58 if (same_shape) {
59 ElementAdd((float *)addn->in1_addr_, (float *)addn->in2_addr_, (float *)addn->out_addr_, addn->elements_num_);
60 } else {
61 ElementOptAdd((float *)addn->in1_addr_, (float *)addn->in2_addr_, (float *)addn->out_addr_, addn->elements_num_,
62 first_scalar);
63 }
64 return;
65 }
66
AddNComputeNoParallel(AddNStruct * addn)67 int AddNComputeNoParallel(AddNStruct *addn) {
68 TensorC *in0_tensor = addn->base_.in_[FIRST_INPUT];
69 NNACL_CHECK_NULL_RETURN_ERR(in0_tensor);
70 TensorC *in1_tensor = addn->base_.in_[SECOND_INPUT];
71 NNACL_CHECK_NULL_RETURN_ERR(in1_tensor);
72 AddNCompute(addn, IsShapeSame(in0_tensor, in1_tensor), GetElementNum(in0_tensor) == 1);
73
74 for (size_t i = Index2; i < addn->base_.in_size_; i++) {
75 TensorC *in_tensor = addn->base_.in_[i];
76 NNACL_CHECK_NULL_RETURN_ERR(in_tensor);
77 addn->in1_addr_ = in_tensor->data_;
78 addn->in2_addr_ = addn->out_addr_;
79 AddNCompute(addn, IsShapeSame(in_tensor, addn->base_.out_[OUTPUT_INDEX]), GetElementNum(in_tensor) == 1);
80 }
81 return NNACL_OK;
82 }
83
AddnResize(struct KernelBase * self)84 int AddnResize(struct KernelBase *self) {
85 AddNStruct *addn = (AddNStruct *)self;
86 NNACL_CHECK_NULL_RETURN_ERR(addn);
87
88 TensorC *out_tensor = self->out_[OUTPUT_INDEX];
89 addn->elements_num_ = GetElementNum(out_tensor);
90 return NNACL_OK;
91 }
92
AddnCompute(struct KernelBase * self)93 int AddnCompute(struct KernelBase *self) {
94 AddNStruct *addn = (AddNStruct *)self;
95 NNACL_CHECK_NULL_RETURN_ERR(addn);
96
97 addn->in1_addr_ = self->in_[FIRST_INPUT]->data_;
98 NNACL_CHECK_NULL_RETURN_ERR(addn->in1_addr_);
99 addn->in2_addr_ = self->in_[SECOND_INPUT]->data_;
100 NNACL_CHECK_NULL_RETURN_ERR(addn->in2_addr_);
101 addn->out_addr_ = self->out_[OUTPUT_INDEX]->data_;
102 NNACL_CHECK_NULL_RETURN_ERR(addn->out_addr_);
103
104 if (addn->elements_num_ < self->thread_nr_) {
105 return AddNComputeNoParallel(addn);
106 }
107
108 for (int i = 0; i < self->in_size_; i++) {
109 TensorC *in_tensor = self->in_[i];
110 if (!IsShapeSame(in_tensor, self->out_[OUTPUT_INDEX])) {
111 return NNACL_ADDN_SHAPE_UNMATCH;
112 }
113 }
114
115 int ret = self->env_->ParallelLaunch(self->env_->thread_pool_, AddNLaunch, self, self->thread_nr_);
116 if (ret != NNACL_OK) {
117 return ret;
118 }
119
120 for (size_t i = Index2; i < self->in_size_; ++i) {
121 addn->in1_addr_ = self->in_[i]->data_;
122 NNACL_CHECK_NULL_RETURN_ERR(addn->in1_addr_);
123 addn->in2_addr_ = addn->out_addr_;
124 ret = self->env_->ParallelLaunch(self->env_->thread_pool_, AddNLaunch, self, self->thread_nr_);
125 if (ret != NNACL_OK) {
126 return ret;
127 }
128 }
129 return NNACL_OK;
130 }
131
CreateAddN(OpParameter * param,int data_type)132 KernelBase *CreateAddN(OpParameter *param, int data_type) {
133 AddNStruct *addn = (AddNStruct *)malloc(sizeof(AddNStruct));
134 NNACL_MALLOC_CHECK_NULL_RETURN_NULL(addn);
135 addn->data_type_ = data_type;
136 addn->base_.Prepare = DefaultPrepare1In1Out;
137 addn->base_.Resize = AddnResize;
138 addn->base_.Release = DefaultRelease;
139 addn->base_.Compute = AddnCompute;
140 return (KernelBase *)addn;
141 }
142
143 REG_KERNEL_CREATOR(PrimType_AddN, kNumberTypeFloat16, CreateAddN)
144 REG_KERNEL_CREATOR(PrimType_AddN, kNumberTypeFloat32, CreateAddN)
145