• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright 2020 Huawei Technologies Co., Ltd
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 #include "src/runtime/kernel/arm/base/stack_base.h"
17 #include <vector>
18 #include "schema/model_generated.h"
19 #include "src/kernel_registry.h"
20 #include "nnacl/base/stack_base.h"
21 #include "nnacl/stack_parameter.h"
22 #include "include/errorcode.h"
23 #include "nnacl/errorcode.h"
24 
25 using mindspore::lite::KernelRegistrar;
26 using mindspore::lite::RET_ERROR;
27 using mindspore::lite::RET_NULL_PTR;
28 using mindspore::lite::RET_OK;
29 using mindspore::schema::PrimitiveType_Stack;
30 
31 namespace mindspore::kernel {
32 namespace {
33 constexpr int kStackStep = 64;
34 }  // namespace
35 
GetCopyNum(const std::vector<int> & in_shape,int axis,int n_dim)36 static int GetCopyNum(const std::vector<int> &in_shape, int axis, int n_dim) {
37   int copy_num = 1;
38   if (axis > 0) {
39     for (int j = n_dim - 1; j > axis - 1; j--) {
40       copy_num *= in_shape[j];
41     }
42   } else {
43     for (int i = 0; i < n_dim; ++i) {
44       copy_num *= in_shape[i];
45     }
46   }
47   return copy_num;
48 }
49 
GetOuterSize(const std::vector<int> & in_shape,int axis)50 static inline int GetOuterSize(const std::vector<int> &in_shape, int axis) {
51   int outer_size = 1;
52   for (int i = 0; i < axis; ++i) {
53     outer_size *= in_shape[i];
54   }
55   return outer_size;
56 }
57 
ReSize()58 int StackBaseCPUKernel::ReSize() {
59   auto param = reinterpret_cast<StackParameter *>(op_parameter_);
60   auto input0_shape = in_tensors_.front()->shape();
61   axis_ = param->axis_ < 0 ? param->axis_ + input0_shape.size() + 1 : param->axis_;
62   auto input_nums = in_tensors_.size();
63   if (input_nums == 1) {
64     copy_size_ = in_tensors_.front()->ElementsNum() * data_type_size_;
65   } else {
66     MS_ASSERT(input_nums > 1);
67     CHECK_LESS_RETURN(input0_shape.size(), static_cast<size_t>(axis_));
68     copy_size_ = GetCopyNum(input0_shape, axis_, input0_shape.size()) * data_type_size_;
69     outer_size_ = GetOuterSize(input0_shape, axis_);
70   }
71   return RET_OK;
72 }
73 
Init()74 int StackBaseCPUKernel::Init() {
75   CHECK_LESS_RETURN(in_tensors_.size(), 1);
76   CHECK_LESS_RETURN(out_tensors_.size(), 1);
77   data_type_size_ = sizeof(float);
78   if (!InferShapeDone()) {
79     return RET_OK;
80   }
81   return ReSize();
82 }
83 
StackExecute(int task_id)84 int StackBaseCPUKernel::StackExecute(int task_id) {
85   auto output_data = reinterpret_cast<void *>(out_tensors_.at(0)->data());
86   if (output_data == nullptr) {
87     return RET_NULL_PTR;
88   }
89   MS_CHECK_TRUE_RET(num_threads_ != 0, RET_ERROR);
90   auto step = UP_DIV(outer_size_, num_threads_);
91   MS_CHECK_FALSE(INT_MUL_OVERFLOW(task_id, step), RET_ERROR);
92   auto start = task_id * step;
93   auto end = MSMIN(start + step, outer_size_);
94   auto input_num = in_tensors_.size();
95   MS_CHECK_FALSE(INT_MUL_OVERFLOW(input_num * start, copy_size_), RET_ERROR);
96   auto output = reinterpret_cast<char *>(output_data) + input_num * start * copy_size_;
97   Stack(all_inputs_, reinterpret_cast<void *>(output), input_num, copy_size_, start, end);
98   return RET_OK;
99 }
100 
StackRun(void * cdata,int task_id,float lhs_scale,float rhs_scale)101 static int StackRun(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
102   CHECK_NULL_RETURN(cdata);
103   auto stack = reinterpret_cast<StackBaseCPUKernel *>(cdata);
104   if (stack->StackExecute(task_id) != RET_OK) {
105     return RET_ERROR;
106   }
107   return RET_OK;
108 }
109 
Run()110 int StackBaseCPUKernel::Run() {
111   // malloc temporary memory to store all the inputs
112   size_t inputs_num = in_tensors_.size();
113   all_inputs_ = static_cast<void **>(ms_context_->allocator->Malloc(inputs_num * sizeof(void *)));
114   if (all_inputs_ == nullptr) {
115     MS_LOG(ERROR) << "malloc all_inputs failed.";
116     return RET_ERROR;
117   }
118   for (size_t j = 0; j < inputs_num; ++j) {
119     auto input_data = reinterpret_cast<void *>(in_tensors_.at(j)->data());
120     if (input_data == nullptr) {
121       return RET_NULL_PTR;
122     }
123     all_inputs_[j] = input_data;
124   }
125   // run stack
126   num_threads_ = MSMIN(UP_DIV(outer_size_, kStackStep), op_parameter_->thread_num_);
127   auto ret = ParallelLaunch(this->ms_context_, StackRun, this, num_threads_);
128   if (ret != RET_OK) {
129     MS_LOG(ERROR) << "StackBaseCPUKernel Run error: error_code[" << ret << "]";
130     return RET_ERROR;
131   }
132 
133   // free temporary variable all_inputs
134   ms_context_->allocator->Free(all_inputs_);
135   all_inputs_ = nullptr;
136   return RET_OK;
137 }
138 
139 REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_Stack, LiteKernelCreator<StackBaseCPUKernel>)
140 REG_KERNEL(kCPU, kNumberTypeInt32, PrimitiveType_Stack, LiteKernelCreator<StackBaseCPUKernel>)
141 }  // namespace mindspore::kernel
142