• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright 2023 Huawei Technologies Co., Ltd
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "nnacl/kernel/stack.h"
18 #include "nnacl/op_base.h"
19 #include "nnacl/stack_parameter.h"
20 #include "nnacl/nnacl_common.h"
21 #include "nnacl/base/stack_base.h"
22 #include "nnacl/tensor_c_utils.h"
23 
GetCopyNum(const int * in_shape,int axis,int n_dim)24 static inline int GetCopyNum(const int *in_shape, int axis, int n_dim) {
25   int copy_num = 1;
26   if (axis > 0) {
27     for (int j = n_dim - 1; j > axis - 1; j--) {
28       copy_num *= in_shape[j];
29     }
30   } else {
31     for (int i = 0; i < n_dim; ++i) {
32       copy_num *= in_shape[i];
33     }
34   }
35   return copy_num;
36 }
37 
GetOuterSize(const int * in_shape,int axis)38 static inline int GetOuterSize(const int *in_shape, int axis) {
39   int outer_size = 1;
40   for (int i = 0; i < axis; ++i) {
41     outer_size *= in_shape[i];
42   }
43   return outer_size;
44 }
45 
StackRelease(KernelBase * self)46 int StackRelease(KernelBase *self) {
47   StackStruct *stack = (StackStruct *)self;
48   NNACL_CHECK_NULL_RETURN_ERR(stack);
49   if (stack->buffers_ != NULL) {
50     self->env_->Free(self->env_->allocator_, stack->buffers_);
51     stack->buffers_ = NULL;
52   }
53   return NNACL_OK;
54 }
55 
StackPrepare(KernelBase * self)56 int StackPrepare(KernelBase *self) {
57   StackStruct *stack = (StackStruct *)self;
58   NNACL_CHECK_NULL_RETURN_ERR(stack);
59   NNACL_CHECK_FALSE(self->in_size_ < ONE_TENSOR, NNACL_ERR);
60   NNACL_CHECK_FALSE(self->out_size_ < ONE_TENSOR, NNACL_ERR);
61   stack->buffers_ =
62     (void **)self->env_->Alloc(self->env_->allocator_, (self->in_size_ + self->out_size_) * sizeof(void *));
63   NNACL_MALLOC_CHECK_NULL_RETURN_ERR(stack->buffers_);
64   return NNACL_OK;
65 }
66 
StackResize(KernelBase * self)67 int StackResize(KernelBase *self) {
68   StackStruct *stack = (StackStruct *)self;
69   NNACL_CHECK_NULL_RETURN_ERR(stack);
70   TensorC *input = self->in_[FIRST_INPUT];
71   NNACL_CHECK_NULL_RETURN_ERR(input);
72 
73   int origin_axis = ((StackParameter *)self->param_)->axis_;
74   stack->axis_ = origin_axis < 0 ? origin_axis + (int)input->shape_size_ + 1 : origin_axis;
75 
76   if (self->in_size_ == 1) {
77     NNACL_CHECK_FALSE(GetElementNum(input) <= 0, NNACL_STACK_TENSOR_SHAPE_INVALID);
78     stack->copy_size_ = (size_t)GetElementNum(input) * DataTypeCSize(stack->data_type_);
79     stack->outer_size_ = 1;
80   } else {
81     NNACL_CHECK_FALSE((int)input->shape_size_ < stack->axis_, NNACL_STACK_TENSOR_SHAPE_INVALID);
82     size_t copy_num = (size_t)GetCopyNum(input->shape_, stack->axis_, input->shape_size_);
83     stack->copy_size_ = copy_num * DataTypeCSize(stack->data_type_);
84     stack->outer_size_ = GetOuterSize(input->shape_, stack->axis_);
85   }
86 
87   self->thread_nr_ = self->UpdateThread(TC_PTYPE(PrimType_Stack), stack->copy_size_, stack->copy_size_,
88                                         GetElementNum(self->out_[OUTPUT_INDEX]), self->thread_nr_);
89   self->thread_nr_ = NNACL_MIN(UP_DIV(stack->outer_size_, NNACL_STACK_STEP), self->thread_nr_);
90   return NNACL_OK;
91 }
92 
StackRun(void * cdata,int task_id,float l,float r)93 int StackRun(void *cdata, int task_id, float l, float r) {
94   StackStruct *stack = (StackStruct *)cdata;
95   NNACL_CHECK_NULL_RETURN_ERR(stack);
96 
97   NNACL_CHECK_TRUE_RET(stack->base_.thread_nr_ != 0, NNACL_ERR);
98   int step = UP_DIV(stack->outer_size_, stack->base_.thread_nr_);
99   NNACL_CHECK_INT_MUL_NOT_OVERFLOW(task_id, step, NNACL_ERR);
100   int start = task_id * step;
101   int end = NNACL_MIN(start + step, stack->outer_size_);
102   NNACL_CHECK_INT_MUL_NOT_OVERFLOW(stack->base_.in_size_ * (size_t)start, stack->copy_size_, NNACL_ERR);
103 
104   void *output_data = (void *)(stack->base_.out_[OUTPUT_INDEX]->data_);
105   NNACL_CHECK_NULL_RETURN_ERR(output_data);
106   uint8_t *output = (uint8_t *)output_data + stack->base_.in_size_ * (size_t)start * stack->copy_size_;
107 
108   Stack(stack->buffers_, (void *)output, stack->base_.in_size_, stack->copy_size_, start, end);
109   return NNACL_OK;
110 }
111 
StackCompute(KernelBase * self)112 int StackCompute(KernelBase *self) {
113   StackStruct *stack = (StackStruct *)self;
114   NNACL_CHECK_NULL_RETURN_ERR(stack);
115 
116   for (size_t i = 0; i < self->in_size_; ++i) {
117     stack->buffers_[i] = self->in_[i]->data_;
118     NNACL_CHECK_NULL_RETURN_ERR(stack->buffers_[i]);
119   }
120   stack->buffers_[self->in_size_] = self->out_[OUTPUT_INDEX]->data_;
121   NNACL_CHECK_NULL_RETURN_ERR(stack->buffers_[self->in_size_]);
122   return self->env_->ParallelLaunch(self->env_->thread_pool_, StackRun, self, self->thread_nr_);
123 }
124 
CreateStack(OpParameter * param,int data_type)125 KernelBase *CreateStack(OpParameter *param, int data_type) {
126   StackStruct *stack = (StackStruct *)malloc(sizeof(StackStruct));
127   NNACL_MALLOC_CHECK_NULL_RETURN_NULL(stack);
128   stack->buffers_ = NULL;
129   stack->data_type_ = data_type;
130   stack->base_.Release = StackRelease;
131   stack->base_.Prepare = StackPrepare;
132   stack->base_.Resize = StackResize;
133   stack->base_.Compute = StackCompute;
134   return (KernelBase *)stack;
135 }
136 
137 REG_KERNEL_CREATOR(PrimType_Stack, kNumberTypeFloat32, CreateStack)
138 REG_KERNEL_CREATOR(PrimType_Stack, kNumberTypeInt32, CreateStack)
139