• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright 2020 Huawei Technologies Co., Ltd
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "src/runtime/kernel/arm/int8/sub_int8.h"
18 #include "src/kernel_registry.h"
19 #include "include/errorcode.h"
20 
21 using mindspore::lite::KernelRegistrar;
22 using mindspore::lite::RET_ERROR;
23 using mindspore::lite::RET_OK;
24 using mindspore::schema::PrimitiveType_SubFusion;
25 
26 namespace mindspore::kernel {
~SubInt8CPUKernel()27 SubInt8CPUKernel::~SubInt8CPUKernel() {
28   if (quant_param_ != nullptr) {
29     free(quant_param_);
30     quant_param_ = nullptr;
31   }
32 }
33 
Init()34 int SubInt8CPUKernel::Init() {
35   lite::Tensor *input0 = in_tensors_.at(0);
36   lite::Tensor *input1 = in_tensors_.at(1);
37   lite::Tensor *output = out_tensors_.at(0);
38   MS_ASSERT(input0);
39   MS_ASSERT(input1);
40   MS_ASSERT(output);
41 
42   broadcast_ = input0->ElementsNum() != input1->ElementsNum();
43 
44   quant_param_ = reinterpret_cast<SubQuantArg *>(malloc(sizeof(SubQuantArg)));
45   if (quant_param_ == nullptr) {
46     MS_LOG(ERROR) << "Malloc SubQuantArg for Sub int8 op failed!";
47     return RET_ERROR;
48   }
49   quant_param_->in0_args_.scale_ = input0->quant_params().front().scale;
50   quant_param_->in0_args_.zp_ = -input0->quant_params().front().zeroPoint;
51   quant_param_->in1_args_.scale_ = input1->quant_params().front().scale;
52   quant_param_->in1_args_.zp_ = -input1->quant_params().front().zeroPoint;
53   quant_param_->out_args_.scale_ = output->quant_params().front().scale;
54   quant_param_->out_args_.zp_ = output->quant_params().front().zeroPoint;
55 
56   const int left_shift = 20;
57   const double twice_max_input_scale = 2 * std::max(quant_param_->in0_args_.scale_, quant_param_->in1_args_.scale_);
58   const double real_input0_multiplier = quant_param_->in0_args_.scale_ / twice_max_input_scale;
59   const double real_input1_multiplier = quant_param_->in1_args_.scale_ / twice_max_input_scale;
60   const double real_output_multiplier = twice_max_input_scale / ((1 << left_shift) * quant_param_->out_args_.scale_);
61 
62   QuantizeMultiplierSmallerThanOne(real_input0_multiplier, &quant_param_->input0_multiplier_,
63                                    &quant_param_->input0_shift_);
64   QuantizeMultiplierSmallerThanOne(real_input1_multiplier, &quant_param_->input1_multiplier_,
65                                    &quant_param_->input1_shift_);
66   QuantizeMultiplierSmallerThanOne(real_output_multiplier, &quant_param_->output_multiplier_,
67                                    &quant_param_->output_shift_);
68 
69   quant_param_->output_activation_min_ = std::numeric_limits<int8_t>::min();
70   quant_param_->output_activation_max_ = std::numeric_limits<int8_t>::max();
71 
72   int left_shift0 = -quant_param_->input0_shift_ > 0 ? -quant_param_->input0_shift_ : 0;
73   quant_param_->right_shift0_ = -quant_param_->input0_shift_ > 0 ? 0 : quant_param_->input0_shift_;
74 
75   int left_shift1 = -quant_param_->input1_shift_ > 0 ? -quant_param_->input1_shift_ : 0;
76   quant_param_->right_shift1_ = -quant_param_->input1_shift_ > 0 ? 0 : quant_param_->input1_shift_;
77 
78   quant_param_->left_shift_out_ = -quant_param_->output_shift_ > 0 ? -quant_param_->output_shift_ : 0;
79   quant_param_->right_shift_out_ = -quant_param_->output_shift_ > 0 ? 0 : quant_param_->output_shift_;
80 
81   quant_param_->left_shift_result0_ = (1 << left_shift) * ((1 << left_shift0));
82   quant_param_->left_shift_result1_ = (1 << left_shift) * ((1 << left_shift1));
83 
84   MS_ASSERT(left_shift + left_shift0 == left_shift);
85   MS_ASSERT(left_shift + left_shift1 == left_shift);
86 
87   if (!InferShapeDone()) {
88     return RET_OK;
89   }
90   return ReSize();
91 }
92 
ReSize()93 int SubInt8CPUKernel::ReSize() { return RET_OK; }
94 
DoExecute(int task_id)95 int SubInt8CPUKernel::DoExecute(int task_id) {
96   auto input0_data_ = static_cast<int8_t *>(in_tensors_.at(0)->MutableData());
97   auto input1_data_ = static_cast<int8_t *>(in_tensors_.at(1)->MutableData());
98   auto output_data_ = static_cast<int8_t *>(out_tensors_.at(0)->MutableData());
99   auto element_num = out_tensors_[0]->ElementsNum();
100 
101   MS_ASSERT(op_parameter_->thread_num_ != 0);
102   int stride = UP_DIV(element_num, op_parameter_->thread_num_);
103   int count = MSMIN(stride, element_num - stride * task_id);
104   if (count <= 0) {
105     return RET_OK;
106   }
107 
108   auto ret = RET_OK;
109   if (broadcast_) {
110     ret = SubInt8(tile0_data_ + task_id * stride, tile1_data_ + task_id * stride, output_data_ + task_id * stride,
111                   count, quant_param_);
112   } else {
113     ret = SubInt8(input0_data_ + task_id * stride, input1_data_ + task_id * stride, output_data_ + task_id * stride,
114                   count, quant_param_);
115   }
116 
117   if (ret != RET_OK) {
118     MS_LOG(ERROR) << "Subint8 function error error_code[" << ret << "]";
119     return RET_ERROR;
120   }
121   return RET_OK;
122 }
123 
SubInt8Run(void * cdata,int task_id,float lhs_scale,float rhs_scale)124 int SubInt8Run(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
125   auto sub_kernel = reinterpret_cast<SubInt8CPUKernel *>(cdata);
126   auto ret = sub_kernel->DoExecute(task_id);
127   if (ret != RET_OK) {
128     MS_LOG(ERROR) << "SubInt8 DoExecute error task_id[" << task_id << "] error_code[" << ret << "]";
129     return RET_ERROR;
130   }
131   return RET_OK;
132 }
133 
Run()134 int SubInt8CPUKernel::Run() {
135   if (broadcast_) {
136     ArithmeticParameter tile_para;
137     tile_para.ndim_ = out_tensors_.at(0)->shape().size();
138     for (size_t i = 0; i < tile_para.ndim_; i++) {
139       tile_para.in_shape0_[i] = in_tensors_.at(0)->DimensionSize(i);
140       tile_para.in_shape1_[i] = in_tensors_.at(1)->DimensionSize(i);
141       tile_para.out_shape_[i] = out_tensors_.at(0)->DimensionSize(i);
142     }
143     tile0_data_ = static_cast<int8_t *>(ms_context_->allocator->Malloc(out_tensors_.at(0)->Size()));
144     if (tile0_data_ == nullptr) {
145       MS_LOG(ERROR) << "malloc memory fail!";
146       return RET_ERROR;
147     }
148     tile1_data_ = static_cast<int8_t *>(ms_context_->allocator->Malloc(out_tensors_.at(0)->Size()));
149     if (tile1_data_ == nullptr) {
150       MS_LOG(ERROR) << "malloc memory fail!";
151       ms_context_->allocator->Free(tile0_data_);
152       return RET_ERROR;
153     }
154     TileDimensionsInt8(static_cast<int8_t *>(in_tensors_.at(0)->data()),
155                        static_cast<int8_t *>(in_tensors_.at(1)->data()), reinterpret_cast<int8_t *>(tile0_data_),
156                        reinterpret_cast<int8_t *>(tile1_data_), &tile_para);
157   }
158   auto ret = ParallelLaunch(this->ms_context_, SubInt8Run, this, op_parameter_->thread_num_);
159   if (broadcast_) {
160     ms_context_->allocator->Free(tile0_data_);
161     ms_context_->allocator->Free(tile1_data_);
162   }
163   if (ret != RET_OK) {
164     MS_LOG(ERROR) << "SubInt8Run function error error_code[" << ret << "]";
165   }
166   return ret;
167 }
168 
169 REG_KERNEL(kCPU, kNumberTypeInt8, PrimitiveType_SubFusion, LiteKernelCreator<SubInt8CPUKernel>)
170 }  // namespace mindspore::kernel
171