• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright 2020 Huawei Technologies Co., Ltd
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "src/runtime/kernel/arm/fp16/scale_fp16.h"
18 #include <cstring>
19 #include <vector>
20 #include "schema/model_generated.h"
21 #include "src/kernel_registry.h"
22 #include "include/errorcode.h"
23 #include "src/runtime/kernel/arm/fp16/common_fp16.h"
24 #include "nnacl/fp16/scale_fp16.h"
25 #include "nnacl/fp16/cast_fp16.h"
26 
27 using mindspore::lite::KernelRegistrar;
28 using mindspore::lite::RET_ERROR;
29 using mindspore::lite::RET_OK;
30 using mindspore::schema::PrimitiveType_ScaleFusion;
31 
32 namespace mindspore::kernel {
InitScaleOffset()33 int ScaleFp16CPUKernel::InitScaleOffset() {
34   auto scale_tensor = in_tensors_.at(1);
35   malloc_scale_ = scale_tensor->data_type() == kNumberTypeFloat32;
36 
37   if (in_tensors_.size() == 2) {
38     malloc_offset_ = true;
39   } else {
40     auto offset_tensor = in_tensors_.at(2);
41     malloc_offset_ = offset_tensor->data_type() == kNumberTypeFloat32;
42   }
43   return RET_OK;
44 }
45 
Init()46 int ScaleFp16CPUKernel::Init() {
47   if (in_tensors_.size() < 2 || in_tensors_.size() > 3) {
48     MS_LOG(ERROR) << "inputs to Scale operator should be 2 or 3, but " << in_tensors_.size() << " is given.";
49     return RET_ERROR;
50   }
51   CHECK_LESS_RETURN(out_tensors_.size(), 1);
52 
53   if (!InferShapeDone()) {
54     return RET_OK;
55   }
56   auto ret = ReSize();
57   if (ret != RET_OK) {
58     MS_LOG(ERROR) << "Scale fp16 Resize failed";
59     return RET_ERROR;
60   }
61   return RET_OK;
62 }
63 
ReSize()64 int ScaleFp16CPUKernel::ReSize() {
65   auto ret = CalculateParameter();
66   if (ret != RET_OK) {
67     MS_LOG(ERROR) << "Scale fp16 CalculateParameter failed.";
68     return RET_ERROR;
69   }
70 
71   return RET_OK;
72 }
73 
Scale(int task_id)74 int ScaleFp16CPUKernel::Scale(int task_id) {
75   switch (scale_param_->activation_type_) {
76     case schema::ActivationType_RELU6:
77       DoScaleRelu6Fp16(input_, output_, scale_, offset_, task_id, scale_param_);
78       break;
79     case schema::ActivationType_RELU:
80       Fp16DoScaleRelu(input_, output_, scale_, offset_, task_id, scale_param_);
81       break;
82     case schema::ActivationType_NO_ACTIVATION:
83       DoScaleFp16(input_, output_, scale_, offset_, task_id, scale_param_);
84       break;
85     default:
86       MS_LOG(ERROR) << "ScaleFp16 does not support activation type " << scale_param_->activation_type_;
87       return RET_ERROR;
88   }
89   return RET_OK;
90 }
91 
ScaleFp16Run(void * cdata,int task_id,float lhs_scale,float rhs_scale)92 int ScaleFp16Run(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
93   auto scale = reinterpret_cast<ScaleFp16CPUKernel *>(cdata);
94   auto ret = scale->Scale(task_id);
95   if (ret != RET_OK) {
96     MS_LOG(ERROR) << "ScaleRun error task_id[" << task_id << "] error_code[" << ret << "]";
97     return RET_ERROR;
98   }
99   return RET_OK;
100 }
101 
Run()102 int ScaleFp16CPUKernel::Run() {
103   auto input_tensor = in_tensors_.at(0);
104   auto output_tensor = out_tensors_.at(0);
105   CHECK_NULL_RETURN(input_tensor);
106   CHECK_NULL_RETURN(output_tensor);
107   input_ = reinterpret_cast<float16_t *>(input_tensor->data());
108   output_ = reinterpret_cast<float16_t *>(output_tensor->data());
109   CHECK_NULL_RETURN(input_);
110   CHECK_NULL_RETURN(output_);
111   auto ret = InitScaleOffset();
112   if (ret != RET_OK) {
113     MS_LOG(ERROR) << "Scale fp16 InitScaleOffset failed.";
114     return RET_ERROR;
115   }
116 
117   ret = MallocAssignTmpBuffer();
118   if (ret != RET_OK) {
119     MS_LOG(ERROR) << "Scale Fp16 malloc tmp buffer failed";
120     FreeTmpBuffer();
121     return ret;
122   }
123 
124   ret = ParallelLaunch(this->ms_context_, ScaleFp16Run, this, op_parameter_->thread_num_);
125   if (ret != RET_OK) {
126     MS_LOG(ERROR) << "Scale error error_code[" << ret << "]";
127     FreeTmpBuffer();
128     return RET_ERROR;
129   }
130 
131   FreeTmpBuffer();
132   return RET_OK;
133 }
134 
MallocAssignTmpBuffer()135 int ScaleFp16CPUKernel::MallocAssignTmpBuffer() {
136   scale_ = ConvertInputFp32toFp16(in_tensors_.at(1), static_cast<const lite::InnerContext *>(this->ms_context_));
137   if (scale_ == nullptr) {
138     return RET_ERROR;
139   }
140   if (in_tensors_.size() == 3) {
141     offset_ = ConvertInputFp32toFp16(in_tensors_.at(2), static_cast<const lite::InnerContext *>(this->ms_context_));
142     if (offset_ == nullptr) {
143       return RET_ERROR;
144     }
145   } else {
146     MS_CHECK_INT_MUL_NOT_OVERFLOW(in_tensors_.at(1)->ElementsNum(), sizeof(float16_t), RET_ERROR);
147     offset_ = reinterpret_cast<float16_t *>(
148       ms_context_->allocator->Malloc(in_tensors_.at(1)->ElementsNum() * sizeof(float16_t)));
149     if (offset_ == nullptr) {
150       MS_LOG(ERROR) << "Malloc data failed";
151       return RET_ERROR;
152     }
153     memset(offset_, 0, in_tensors_.at(1)->ElementsNum() * sizeof(float16_t));
154   }
155   return RET_OK;
156 }
157 
FreeTmpBuffer()158 void ScaleFp16CPUKernel::FreeTmpBuffer() {
159   if (malloc_scale_ && scale_ != nullptr) {
160     ms_context_->allocator->Free(scale_);
161     scale_ = nullptr;
162   }
163   if (malloc_offset_ && offset_ != nullptr) {
164     ms_context_->allocator->Free(offset_);
165     offset_ = nullptr;
166   }
167 }
168 
169 REG_KERNEL(kCPU, kNumberTypeFloat16, PrimitiveType_ScaleFusion, LiteKernelCreator<ScaleFp16CPUKernel>)
170 }  // namespace mindspore::kernel
171