1 /**
2 * Copyright 2023 Huawei Technologies Co., Ltd
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "nnacl/kernel/stack.h"
18 #include "nnacl/op_base.h"
19 #include "nnacl/stack_parameter.h"
20 #include "nnacl/nnacl_common.h"
21 #include "nnacl/base/stack_base.h"
22 #include "nnacl/tensor_c_utils.h"
23
GetCopyNum(const int * in_shape,int axis,int n_dim)24 static inline int GetCopyNum(const int *in_shape, int axis, int n_dim) {
25 int copy_num = 1;
26 if (axis > 0) {
27 for (int j = n_dim - 1; j > axis - 1; j--) {
28 copy_num *= in_shape[j];
29 }
30 } else {
31 for (int i = 0; i < n_dim; ++i) {
32 copy_num *= in_shape[i];
33 }
34 }
35 return copy_num;
36 }
37
GetOuterSize(const int * in_shape,int axis)38 static inline int GetOuterSize(const int *in_shape, int axis) {
39 int outer_size = 1;
40 for (int i = 0; i < axis; ++i) {
41 outer_size *= in_shape[i];
42 }
43 return outer_size;
44 }
45
StackRelease(KernelBase * self)46 int StackRelease(KernelBase *self) {
47 StackStruct *stack = (StackStruct *)self;
48 NNACL_CHECK_NULL_RETURN_ERR(stack);
49 if (stack->buffers_ != NULL) {
50 self->env_->Free(self->env_->allocator_, stack->buffers_);
51 stack->buffers_ = NULL;
52 }
53 return NNACL_OK;
54 }
55
StackPrepare(KernelBase * self)56 int StackPrepare(KernelBase *self) {
57 StackStruct *stack = (StackStruct *)self;
58 NNACL_CHECK_NULL_RETURN_ERR(stack);
59 NNACL_CHECK_FALSE(self->in_size_ < ONE_TENSOR, NNACL_ERR);
60 NNACL_CHECK_FALSE(self->out_size_ < ONE_TENSOR, NNACL_ERR);
61 stack->buffers_ =
62 (void **)self->env_->Alloc(self->env_->allocator_, (self->in_size_ + self->out_size_) * sizeof(void *));
63 NNACL_MALLOC_CHECK_NULL_RETURN_ERR(stack->buffers_);
64 return NNACL_OK;
65 }
66
StackResize(KernelBase * self)67 int StackResize(KernelBase *self) {
68 StackStruct *stack = (StackStruct *)self;
69 NNACL_CHECK_NULL_RETURN_ERR(stack);
70 TensorC *input = self->in_[FIRST_INPUT];
71 NNACL_CHECK_NULL_RETURN_ERR(input);
72
73 int origin_axis = ((StackParameter *)self->param_)->axis_;
74 stack->axis_ = origin_axis < 0 ? origin_axis + (int)input->shape_size_ + 1 : origin_axis;
75
76 if (self->in_size_ == 1) {
77 NNACL_CHECK_FALSE(GetElementNum(input) <= 0, NNACL_STACK_TENSOR_SHAPE_INVALID);
78 stack->copy_size_ = (size_t)GetElementNum(input) * DataTypeCSize(stack->data_type_);
79 stack->outer_size_ = 1;
80 } else {
81 NNACL_CHECK_FALSE((int)input->shape_size_ < stack->axis_, NNACL_STACK_TENSOR_SHAPE_INVALID);
82 size_t copy_num = (size_t)GetCopyNum(input->shape_, stack->axis_, input->shape_size_);
83 stack->copy_size_ = copy_num * DataTypeCSize(stack->data_type_);
84 stack->outer_size_ = GetOuterSize(input->shape_, stack->axis_);
85 }
86
87 self->thread_nr_ = self->UpdateThread(TC_PTYPE(PrimType_Stack), stack->copy_size_, stack->copy_size_,
88 GetElementNum(self->out_[OUTPUT_INDEX]), self->thread_nr_);
89 self->thread_nr_ = NNACL_MIN(UP_DIV(stack->outer_size_, NNACL_STACK_STEP), self->thread_nr_);
90 return NNACL_OK;
91 }
92
StackRun(void * cdata,int task_id,float l,float r)93 int StackRun(void *cdata, int task_id, float l, float r) {
94 StackStruct *stack = (StackStruct *)cdata;
95 NNACL_CHECK_NULL_RETURN_ERR(stack);
96
97 NNACL_CHECK_TRUE_RET(stack->base_.thread_nr_ != 0, NNACL_ERR);
98 int step = UP_DIV(stack->outer_size_, stack->base_.thread_nr_);
99 NNACL_CHECK_INT_MUL_NOT_OVERFLOW(task_id, step, NNACL_ERR);
100 int start = task_id * step;
101 int end = NNACL_MIN(start + step, stack->outer_size_);
102 NNACL_CHECK_INT_MUL_NOT_OVERFLOW(stack->base_.in_size_ * (size_t)start, stack->copy_size_, NNACL_ERR);
103
104 void *output_data = (void *)(stack->base_.out_[OUTPUT_INDEX]->data_);
105 NNACL_CHECK_NULL_RETURN_ERR(output_data);
106 uint8_t *output = (uint8_t *)output_data + stack->base_.in_size_ * (size_t)start * stack->copy_size_;
107
108 Stack(stack->buffers_, (void *)output, stack->base_.in_size_, stack->copy_size_, start, end);
109 return NNACL_OK;
110 }
111
StackCompute(KernelBase * self)112 int StackCompute(KernelBase *self) {
113 StackStruct *stack = (StackStruct *)self;
114 NNACL_CHECK_NULL_RETURN_ERR(stack);
115
116 for (size_t i = 0; i < self->in_size_; ++i) {
117 stack->buffers_[i] = self->in_[i]->data_;
118 NNACL_CHECK_NULL_RETURN_ERR(stack->buffers_[i]);
119 }
120 stack->buffers_[self->in_size_] = self->out_[OUTPUT_INDEX]->data_;
121 NNACL_CHECK_NULL_RETURN_ERR(stack->buffers_[self->in_size_]);
122 return self->env_->ParallelLaunch(self->env_->thread_pool_, StackRun, self, self->thread_nr_);
123 }
124
CreateStack(OpParameter * param,int data_type)125 KernelBase *CreateStack(OpParameter *param, int data_type) {
126 StackStruct *stack = (StackStruct *)malloc(sizeof(StackStruct));
127 NNACL_MALLOC_CHECK_NULL_RETURN_NULL(stack);
128 stack->buffers_ = NULL;
129 stack->data_type_ = data_type;
130 stack->base_.Release = StackRelease;
131 stack->base_.Prepare = StackPrepare;
132 stack->base_.Resize = StackResize;
133 stack->base_.Compute = StackCompute;
134 return (KernelBase *)stack;
135 }
136
137 REG_KERNEL_CREATOR(PrimType_Stack, kNumberTypeFloat32, CreateStack)
138 REG_KERNEL_CREATOR(PrimType_Stack, kNumberTypeInt32, CreateStack)
139