1 /**
2 * Copyright 2020 Huawei Technologies Co., Ltd
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16 #include "src/runtime/kernel/arm/base/stack_base.h"
17 #include <vector>
18 #include "schema/model_generated.h"
19 #include "src/kernel_registry.h"
20 #include "nnacl/base/stack_base.h"
21 #include "nnacl/stack_parameter.h"
22 #include "include/errorcode.h"
23 #include "nnacl/errorcode.h"
24
25 using mindspore::lite::KernelRegistrar;
26 using mindspore::lite::RET_ERROR;
27 using mindspore::lite::RET_NULL_PTR;
28 using mindspore::lite::RET_OK;
29 using mindspore::schema::PrimitiveType_Stack;
30
31 namespace mindspore::kernel {
32 namespace {
33 constexpr int kStackStep = 64;
34 } // namespace
35
GetCopyNum(const std::vector<int> & in_shape,int axis,int n_dim)36 static int GetCopyNum(const std::vector<int> &in_shape, int axis, int n_dim) {
37 int copy_num = 1;
38 if (axis > 0) {
39 for (int j = n_dim - 1; j > axis - 1; j--) {
40 copy_num *= in_shape[j];
41 }
42 } else {
43 for (int i = 0; i < n_dim; ++i) {
44 copy_num *= in_shape[i];
45 }
46 }
47 return copy_num;
48 }
49
GetOuterSize(const std::vector<int> & in_shape,int axis)50 static inline int GetOuterSize(const std::vector<int> &in_shape, int axis) {
51 int outer_size = 1;
52 for (int i = 0; i < axis; ++i) {
53 outer_size *= in_shape[i];
54 }
55 return outer_size;
56 }
57
ReSize()58 int StackBaseCPUKernel::ReSize() {
59 auto param = reinterpret_cast<StackParameter *>(op_parameter_);
60 auto input0_shape = in_tensors_.front()->shape();
61 axis_ = param->axis_ < 0 ? param->axis_ + input0_shape.size() + 1 : param->axis_;
62 auto input_nums = in_tensors_.size();
63 if (input_nums == 1) {
64 copy_size_ = in_tensors_.front()->ElementsNum() * data_type_size_;
65 } else {
66 MS_ASSERT(input_nums > 1);
67 CHECK_LESS_RETURN(input0_shape.size(), static_cast<size_t>(axis_));
68 copy_size_ = GetCopyNum(input0_shape, axis_, input0_shape.size()) * data_type_size_;
69 outer_size_ = GetOuterSize(input0_shape, axis_);
70 }
71 return RET_OK;
72 }
73
Init()74 int StackBaseCPUKernel::Init() {
75 CHECK_LESS_RETURN(in_tensors_.size(), 1);
76 CHECK_LESS_RETURN(out_tensors_.size(), 1);
77 data_type_size_ = sizeof(float);
78 if (!InferShapeDone()) {
79 return RET_OK;
80 }
81 return ReSize();
82 }
83
StackExecute(int task_id)84 int StackBaseCPUKernel::StackExecute(int task_id) {
85 auto output_data = reinterpret_cast<void *>(out_tensors_.at(0)->data());
86 if (output_data == nullptr) {
87 return RET_NULL_PTR;
88 }
89 MS_CHECK_TRUE_RET(num_threads_ != 0, RET_ERROR);
90 auto step = UP_DIV(outer_size_, num_threads_);
91 MS_CHECK_FALSE(INT_MUL_OVERFLOW(task_id, step), RET_ERROR);
92 auto start = task_id * step;
93 auto end = MSMIN(start + step, outer_size_);
94 auto input_num = in_tensors_.size();
95 MS_CHECK_FALSE(INT_MUL_OVERFLOW(input_num * start, copy_size_), RET_ERROR);
96 auto output = reinterpret_cast<char *>(output_data) + input_num * start * copy_size_;
97 Stack(all_inputs_, reinterpret_cast<void *>(output), input_num, copy_size_, start, end);
98 return RET_OK;
99 }
100
StackRun(void * cdata,int task_id,float lhs_scale,float rhs_scale)101 static int StackRun(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
102 CHECK_NULL_RETURN(cdata);
103 auto stack = reinterpret_cast<StackBaseCPUKernel *>(cdata);
104 if (stack->StackExecute(task_id) != RET_OK) {
105 return RET_ERROR;
106 }
107 return RET_OK;
108 }
109
Run()110 int StackBaseCPUKernel::Run() {
111 // malloc temporary memory to store all the inputs
112 size_t inputs_num = in_tensors_.size();
113 all_inputs_ = static_cast<void **>(ms_context_->allocator->Malloc(inputs_num * sizeof(void *)));
114 if (all_inputs_ == nullptr) {
115 MS_LOG(ERROR) << "malloc all_inputs failed.";
116 return RET_ERROR;
117 }
118 for (size_t j = 0; j < inputs_num; ++j) {
119 auto input_data = reinterpret_cast<void *>(in_tensors_.at(j)->data());
120 if (input_data == nullptr) {
121 return RET_NULL_PTR;
122 }
123 all_inputs_[j] = input_data;
124 }
125 // run stack
126 num_threads_ = MSMIN(UP_DIV(outer_size_, kStackStep), op_parameter_->thread_num_);
127 auto ret = ParallelLaunch(this->ms_context_, StackRun, this, num_threads_);
128 if (ret != RET_OK) {
129 MS_LOG(ERROR) << "StackBaseCPUKernel Run error: error_code[" << ret << "]";
130 return RET_ERROR;
131 }
132
133 // free temporary variable all_inputs
134 ms_context_->allocator->Free(all_inputs_);
135 all_inputs_ = nullptr;
136 return RET_OK;
137 }
138
139 REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_Stack, LiteKernelCreator<StackBaseCPUKernel>)
140 REG_KERNEL(kCPU, kNumberTypeInt32, PrimitiveType_Stack, LiteKernelCreator<StackBaseCPUKernel>)
141 } // namespace mindspore::kernel
142