• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright 2020 Huawei Technologies Co., Ltd
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 #include "src/runtime/kernel/arm/fp16/stack_fp16.h"
17 #include <vector>
18 #include "schema/model_generated.h"
19 #include "src/kernel_registry.h"
20 #include "nnacl/stack_parameter.h"
21 #include "include/errorcode.h"
22 #include "src/runtime/kernel/arm/fp16/common_fp16.h"
23 #include "nnacl/fp16/cast_fp16.h"
24 #include "nnacl/base/stack_base.h"
25 #include "nnacl/errorcode.h"
26 
27 using mindspore::lite::KernelRegistrar;
28 using mindspore::lite::RET_ERROR;
29 using mindspore::lite::RET_OK;
30 using mindspore::schema::PrimitiveType_Stack;
31 
32 namespace mindspore::kernel {
33 namespace {
34 constexpr int kStackStep = 64;
35 }  // namespace
36 
InitMallocFlags()37 void StackFp16CPUKernel::InitMallocFlags() {
38   malloc_buffers_.resize(in_tensors_.size());
39   for (size_t i = 0; i < in_tensors_.size(); ++i) {
40     malloc_buffers_.at(i) = in_tensors_.at(i)->data_type() == kNumberTypeFloat32;
41   }
42   malloc_out_ = out_tensors_.at(0)->data_type() == kNumberTypeFloat32;
43 }
44 
MallocAssignBuffer()45 int StackFp16CPUKernel::MallocAssignBuffer() {
46   buffers_.resize(in_tensors_.size(), nullptr);
47   for (size_t i = 0; i < in_tensors_.size(); ++i) {
48     buffers_.at(i) = reinterpret_cast<void *>(
49       ConvertInputFp32toFp16(in_tensors_.at(i), static_cast<const lite::InnerContext *>(ms_context_)));
50     if (buffers_.at(i) == nullptr) {
51       return RET_ERROR;
52     }
53   }
54 
55   out_buffer_ = nullptr;
56   out_buffer_ = MallocOutputFp16(out_tensors_.at(0), static_cast<const lite::InnerContext *>(this->ms_context_));
57   if (out_buffer_ == nullptr) {
58     return RET_ERROR;
59   }
60   return RET_OK;
61 }
62 
FreeBuffer()63 void StackFp16CPUKernel::FreeBuffer() {
64   for (size_t i = 0; i < buffers_.size(); ++i) {
65     if (malloc_buffers_.at(i) && buffers_.at(i) != nullptr) {
66       ms_context_->allocator->Free(buffers_.at(i));
67       buffers_.at(i) = nullptr;
68     }
69   }
70   if (malloc_out_ && out_buffer_ != nullptr) {
71     ms_context_->allocator->Free(out_buffer_);
72     out_buffer_ = nullptr;
73   }
74 }
75 
Init()76 int StackFp16CPUKernel::Init() {
77   CHECK_LESS_RETURN(in_tensors_.size(), 1);
78   CHECK_LESS_RETURN(out_tensors_.size(), 1);
79   data_type_size_ = sizeof(float16_t);
80   if (!InferShapeDone()) {
81     return RET_OK;
82   }
83   return ReSize();
84 }
85 
DoExecute(int task_id)86 int StackFp16CPUKernel::DoExecute(int task_id) {
87   auto inputs = buffers_.data();
88   void *output_data = reinterpret_cast<void *>(out_buffer_);
89   auto step = UP_DIV(outer_size_, num_threads_);
90   MS_CHECK_FALSE(INT_MUL_OVERFLOW(task_id, step), RET_ERROR);
91   auto start = task_id * step;
92   auto end = MSMIN(start + step, outer_size_);
93   auto input_num = in_tensors_.size();
94   MS_CHECK_FALSE(INT_MUL_OVERFLOW(input_num * start, copy_size_), RET_ERROR);
95   void *output = reinterpret_cast<char *>(output_data) + input_num * start * copy_size_;
96   Stack(inputs, reinterpret_cast<void *>(output), input_num, copy_size_, start, end);
97   return RET_OK;
98 }
99 
StackRun(void * cdata,int task_id,float lhs_scale,float rhs_scale)100 static int StackRun(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
101   auto stack = reinterpret_cast<StackFp16CPUKernel *>(cdata);
102   if (stack->DoExecute(task_id) != RET_OK) {
103     return RET_ERROR;
104   }
105   return RET_OK;
106 }
107 
Run()108 int StackFp16CPUKernel::Run() {
109   InitMallocFlags();
110   auto ret = MallocAssignBuffer();
111   if (ret != RET_OK) {
112     FreeBuffer();
113     return ret;
114   }
115   // run stack
116   num_threads_ = MSMIN(UP_DIV(outer_size_, kStackStep), this->op_parameter_->thread_num_);
117   ret = ParallelLaunch(this->ms_context_, StackRun, this, num_threads_);
118   if (ret != RET_OK) {
119     MS_LOG(ERROR) << "StackBaseCPUKernel Run error: error_code[" << ret << "]";
120     return RET_ERROR;
121   }
122   // if output tensor is fp32, we need to transform
123   if (malloc_out_) {
124     auto out_tensor = out_tensors_.at(0);
125     MS_ASSERT(out_tensor != nullptr);
126     MS_ASSERT(out_tensor->data() != nullptr);
127     Float16ToFloat32(out_buffer_, reinterpret_cast<float *>(out_tensor->data()), out_tensor->ElementsNum());
128   }
129   FreeBuffer();
130   return RET_OK;
131 }
132 
133 REG_KERNEL(kCPU, kNumberTypeFloat16, PrimitiveType_Stack, LiteKernelCreator<StackFp16CPUKernel>)
134 }  // namespace mindspore::kernel
135