• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright 2023 Huawei Technologies Co., Ltd
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "nnacl/kernel/addn.h"
18 #include "nnacl/fp32/add_fp32.h"
19 #include "nnacl/tensor_c_utils.h"
20 #include "nnacl/kernel/default_kernel_base.h"
21 #ifdef ENABLE_FP16
22 #include "nnacl/fp16/arithmetic_fp16.h"
23 #endif
24 
AddNLaunch(void * cdata,int task_id,float l,float r)25 int AddNLaunch(void *cdata, int task_id, float l, float r) {
26   AddNStruct *addn = (AddNStruct *)cdata;
27   NNACL_CHECK_NULL_RETURN_ERR(addn);
28 
29   int count_per_thread = UP_DIV(addn->elements_num_, addn->base_.thread_nr_);
30   NNACL_CHECK_INT_MUL_NOT_OVERFLOW(task_id, count_per_thread, NNACL_ERR);
31   int count = MSMIN(count_per_thread, addn->elements_num_ - task_id * count_per_thread);
32   int stride = count_per_thread * task_id;
33 
34 #ifdef ENABLE_FP16
35   if (addn->data_type_ == kNumberTypeFloat16) {
36     return ElementAddFp16((float16_t *)addn->in1_addr_ + stride, (float16_t *)addn->in2_addr_ + stride,
37                           (float16_t *)addn->out_addr_ + stride, count);
38   }
39 #endif
40   return ElementAdd((float *)addn->in1_addr_ + stride, (float *)addn->in2_addr_ + stride,
41                     (float *)addn->out_addr_ + stride, count);
42 }
43 
AddNCompute(AddNStruct * addn,bool same_shape,bool first_scalar)44 void AddNCompute(AddNStruct *addn, bool same_shape, bool first_scalar) {
45 #ifdef ENABLE_FP16
46   if (addn->data_type_ == kNumberTypeFloat16) {
47     if (same_shape) {
48       ElementAddFp16((float16_t *)addn->in1_addr_, (float16_t *)addn->in2_addr_, (float16_t *)addn->out_addr_,
49                      addn->elements_num_);
50     } else {
51       ElementOptAddFp16((float16_t *)addn->in1_addr_, (float16_t *)addn->in2_addr_, (float16_t *)addn->out_addr_,
52                         addn->elements_num_, first_scalar);
53     }
54     return;
55   }
56 #endif
57 
58   if (same_shape) {
59     ElementAdd((float *)addn->in1_addr_, (float *)addn->in2_addr_, (float *)addn->out_addr_, addn->elements_num_);
60   } else {
61     ElementOptAdd((float *)addn->in1_addr_, (float *)addn->in2_addr_, (float *)addn->out_addr_, addn->elements_num_,
62                   first_scalar);
63   }
64   return;
65 }
66 
AddNComputeNoParallel(AddNStruct * addn)67 int AddNComputeNoParallel(AddNStruct *addn) {
68   TensorC *in0_tensor = addn->base_.in_[FIRST_INPUT];
69   NNACL_CHECK_NULL_RETURN_ERR(in0_tensor);
70   TensorC *in1_tensor = addn->base_.in_[SECOND_INPUT];
71   NNACL_CHECK_NULL_RETURN_ERR(in1_tensor);
72   AddNCompute(addn, IsShapeSame(in0_tensor, in1_tensor), GetElementNum(in0_tensor) == 1);
73 
74   for (size_t i = Index2; i < addn->base_.in_size_; i++) {
75     TensorC *in_tensor = addn->base_.in_[i];
76     NNACL_CHECK_NULL_RETURN_ERR(in_tensor);
77     addn->in1_addr_ = in_tensor->data_;
78     addn->in2_addr_ = addn->out_addr_;
79     AddNCompute(addn, IsShapeSame(in_tensor, addn->base_.out_[OUTPUT_INDEX]), GetElementNum(in_tensor) == 1);
80   }
81   return NNACL_OK;
82 }
83 
AddnResize(struct KernelBase * self)84 int AddnResize(struct KernelBase *self) {
85   AddNStruct *addn = (AddNStruct *)self;
86   NNACL_CHECK_NULL_RETURN_ERR(addn);
87 
88   TensorC *out_tensor = self->out_[OUTPUT_INDEX];
89   addn->elements_num_ = GetElementNum(out_tensor);
90   return NNACL_OK;
91 }
92 
AddnCompute(struct KernelBase * self)93 int AddnCompute(struct KernelBase *self) {
94   AddNStruct *addn = (AddNStruct *)self;
95   NNACL_CHECK_NULL_RETURN_ERR(addn);
96 
97   addn->in1_addr_ = self->in_[FIRST_INPUT]->data_;
98   NNACL_CHECK_NULL_RETURN_ERR(addn->in1_addr_);
99   addn->in2_addr_ = self->in_[SECOND_INPUT]->data_;
100   NNACL_CHECK_NULL_RETURN_ERR(addn->in2_addr_);
101   addn->out_addr_ = self->out_[OUTPUT_INDEX]->data_;
102   NNACL_CHECK_NULL_RETURN_ERR(addn->out_addr_);
103 
104   if (addn->elements_num_ < self->thread_nr_) {
105     return AddNComputeNoParallel(addn);
106   }
107 
108   for (int i = 0; i < self->in_size_; i++) {
109     TensorC *in_tensor = self->in_[i];
110     if (!IsShapeSame(in_tensor, self->out_[OUTPUT_INDEX])) {
111       return NNACL_ADDN_SHAPE_UNMATCH;
112     }
113   }
114 
115   int ret = self->env_->ParallelLaunch(self->env_->thread_pool_, AddNLaunch, self, self->thread_nr_);
116   if (ret != NNACL_OK) {
117     return ret;
118   }
119 
120   for (size_t i = Index2; i < self->in_size_; ++i) {
121     addn->in1_addr_ = self->in_[i]->data_;
122     NNACL_CHECK_NULL_RETURN_ERR(addn->in1_addr_);
123     addn->in2_addr_ = addn->out_addr_;
124     ret = self->env_->ParallelLaunch(self->env_->thread_pool_, AddNLaunch, self, self->thread_nr_);
125     if (ret != NNACL_OK) {
126       return ret;
127     }
128   }
129   return NNACL_OK;
130 }
131 
CreateAddN(OpParameter * param,int data_type)132 KernelBase *CreateAddN(OpParameter *param, int data_type) {
133   AddNStruct *addn = (AddNStruct *)malloc(sizeof(AddNStruct));
134   NNACL_MALLOC_CHECK_NULL_RETURN_NULL(addn);
135   addn->data_type_ = data_type;
136   addn->base_.Prepare = DefaultPrepare1In1Out;
137   addn->base_.Resize = AddnResize;
138   addn->base_.Release = DefaultRelease;
139   addn->base_.Compute = AddnCompute;
140   return (KernelBase *)addn;
141 }
142 
143 REG_KERNEL_CREATOR(PrimType_AddN, kNumberTypeFloat16, CreateAddN)
144 REG_KERNEL_CREATOR(PrimType_AddN, kNumberTypeFloat32, CreateAddN)
145