1 /**
2 * Copyright 2020 Huawei Technologies Co., Ltd
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "src/runtime/kernel/arm/int8/convolution_depthwise_3x3_int8.h"
18 #include "include/errorcode.h"
19 #include "nnacl/int8/conv_depthwise_int8.h"
20
21 using mindspore::lite::RET_ERROR;
22 using mindspore::lite::RET_OK;
23
24 namespace mindspore::kernel {
25 namespace {
26 constexpr int kConvDepthwise3x3BufferSize = 64 * 10 * 10;
27 constexpr int kChannelUnit = 8;
28 } // namespace
~ConvolutionDepthwise3x3Int8CPUKernel()29 ConvolutionDepthwise3x3Int8CPUKernel::~ConvolutionDepthwise3x3Int8CPUKernel() {
30 if (sliding_ != nullptr) {
31 delete sliding_;
32 sliding_ = nullptr;
33 }
34 if (packed_weight_ != nullptr) {
35 free(packed_weight_);
36 packed_weight_ = nullptr;
37 }
38 FreeQuantParam();
39 }
40
InitWeightBias()41 int ConvolutionDepthwise3x3Int8CPUKernel::InitWeightBias() {
42 CHECK_NULL_RETURN(conv_param_);
43 // init weight, int8 -> int16
44 auto weight_tensor = in_tensors_.at(kWeightIndex);
45 CHECK_NULL_RETURN(weight_tensor);
46 auto origin_weight = reinterpret_cast<int8_t *>(weight_tensor->MutableData());
47 CHECK_NULL_RETURN(origin_weight);
48 int channel = weight_tensor->Batch();
49 if (channel < 0) {
50 MS_LOG(ERROR) << "get bach from weight_tensor failed.";
51 return RET_ERROR;
52 }
53 if (channel % kChannelUnit != 0) {
54 MS_LOG(ERROR) << "ConvolutionDepthwise3x3Int8CPUKernel doesn't support channel " << channel;
55 return RET_ERROR;
56 }
57 int pack_weight_size = channel * weight_tensor->Height() * weight_tensor->Width();
58 auto tmp_weight = reinterpret_cast<int8_t *>(malloc(pack_weight_size * sizeof(int8_t)));
59 if (tmp_weight == nullptr) {
60 MS_LOG(ERROR) << "Malloc buffer failed.";
61 return RET_ERROR;
62 }
63 PackNCHWToNHWCInt8(origin_weight, tmp_weight, 1, weight_tensor->Height() * weight_tensor->Width(),
64 weight_tensor->Batch());
65
66 packed_weight_ = reinterpret_cast<int16_t *>(malloc(static_cast<size_t>(pack_weight_size) * sizeof(int16_t)));
67 if (packed_weight_ == nullptr) {
68 MS_LOG(ERROR) << "Malloc buffer failed.";
69 free(tmp_weight);
70 return RET_ERROR;
71 }
72 bool filter_per_channel = static_cast<bool>(conv_param_->conv_quant_arg_.per_channel_ & FILTER_PER_CHANNEL);
73 if (filter_per_channel) {
74 for (int i = 0; i < weight_tensor->Height() * weight_tensor->Width(); i++) {
75 for (int c = 0; c < channel; c++) {
76 int per_channel_weight_zp = conv_param_->conv_quant_arg_.filter_quant_args_[c].zp_;
77 packed_weight_[i * channel + c] = (int16_t)(tmp_weight[i * channel + c] - per_channel_weight_zp);
78 }
79 }
80 } else {
81 int weight_zp = conv_param_->conv_quant_arg_.filter_quant_args_[0].zp_;
82 if (weight_tensor->ElementsNum() > pack_weight_size) {
83 MS_LOG(ERROR) << "weight_tensor->ElementsNum() is larger than pack_weight_size.";
84 free(tmp_weight);
85 return RET_ERROR;
86 }
87 for (int i = 0; i < weight_tensor->ElementsNum(); i++) {
88 packed_weight_[i] = (int16_t)(tmp_weight[i] - weight_zp);
89 }
90 }
91 free(tmp_weight);
92
93 bias_data_ = reinterpret_cast<int32_t *>(malloc(static_cast<size_t>(channel) * sizeof(int32_t)));
94 if (bias_data_ == nullptr) {
95 MS_LOG(ERROR) << "Malloc buffer failed.";
96 return RET_ERROR;
97 }
98 memset(bias_data_, 0, static_cast<size_t>(channel) * sizeof(int32_t));
99 if (in_tensors_.size() == kInputSize2) {
100 auto bias_tensor = in_tensors_.at(kBiasIndex);
101 CHECK_NULL_RETURN(bias_tensor);
102 auto ori_bias = reinterpret_cast<int32_t *>(bias_tensor->MutableData());
103 CHECK_NULL_RETURN(ori_bias);
104 memcpy(bias_data_, ori_bias, static_cast<size_t>(bias_tensor->ElementsNum()) * sizeof(int32_t));
105 }
106 return RET_OK;
107 }
108
Init()109 int ConvolutionDepthwise3x3Int8CPUKernel::Init() {
110 CHECK_LESS_RETURN(in_tensors_.size(), 2);
111 CHECK_LESS_RETURN(out_tensors_.size(), 1);
112 sliding_ = new (std::nothrow) SlidingWindowParam;
113 if (sliding_ == nullptr) {
114 MS_LOG(ERROR) << "new sliding window param.";
115 return RET_ERROR;
116 }
117 auto ret = ConvolutionBaseCPUKernel::SetQuantParam();
118 if (ret != RET_OK) {
119 MS_LOG(ERROR) << "Set quant param failed.";
120 return ret;
121 }
122 ret = InitWeightBias();
123 if (ret != RET_OK) {
124 MS_LOG(ERROR) << "Depthwise int8 InitWeightBias error!";
125 return ret;
126 }
127 if (!InferShapeDone()) {
128 return RET_OK;
129 }
130 return ReSize();
131 }
132
ReSize()133 int ConvolutionDepthwise3x3Int8CPUKernel::ReSize() {
134 auto ret = ConvolutionBaseCPUKernel::Init();
135 if (ret != RET_OK) {
136 MS_LOG(ERROR) << "ConvolutionBaseCPUKernel Init failed.";
137 return ret;
138 }
139 InitSlidingParamConvDw(sliding_, conv_param_, conv_param_->input_channel_);
140 conv_param_->thread_num_ = MSMIN(thread_count_, conv_param_->output_h_);
141 return RET_OK;
142 }
143
DoExecute(int task_id)144 int ConvolutionDepthwise3x3Int8CPUKernel::DoExecute(int task_id) {
145 auto buffer = buffer_ + kConvDepthwise3x3BufferSize * task_id;
146 ConvDw3x3Int8(output_ptr_, buffer, input_ptr_, packed_weight_, reinterpret_cast<int32_t *>(bias_data_), conv_param_,
147 sliding_, task_id);
148 return RET_OK;
149 }
150
ConvDw3x3Int8Run(void * cdata,int task_id,float lhs_scale,float rhs_scale)151 int ConvDw3x3Int8Run(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
152 auto conv_dw_int8 = reinterpret_cast<ConvolutionDepthwise3x3Int8CPUKernel *>(cdata);
153 auto ret = conv_dw_int8->DoExecute(task_id);
154 if (ret != RET_OK) {
155 MS_LOG(ERROR) << "ConvolutionDepthwise3x3Int8Run error task_id[" << task_id << "] error_code[" << ret << "]";
156 return RET_ERROR;
157 }
158 return RET_OK;
159 }
160
InitBuffer()161 int ConvolutionDepthwise3x3Int8CPUKernel::InitBuffer() {
162 int buffer_size = kConvDepthwise3x3BufferSize * conv_param_->thread_num_;
163 buffer_ =
164 reinterpret_cast<int8_t *>(ms_context_->allocator->Malloc(static_cast<size_t>(buffer_size) * sizeof(int8_t)));
165 if (buffer_ == nullptr) {
166 MS_LOG(ERROR) << "Malloc buffer failed.";
167 return RET_ERROR;
168 }
169 return RET_OK;
170 }
171
Run()172 int ConvolutionDepthwise3x3Int8CPUKernel::Run() {
173 auto ret = InitBuffer();
174 if (ret != RET_OK) {
175 MS_LOG(ERROR) << "Depthwise int8 ReSize error!";
176 return ret;
177 }
178
179 auto input_tensor = in_tensors_.at(kInputIndex);
180 CHECK_NULL_RETURN(input_tensor);
181 input_ptr_ = reinterpret_cast<int8_t *>(input_tensor->MutableData());
182 CHECK_NULL_RETURN(input_ptr_);
183
184 auto output_tensor = out_tensors_.at(kOutputIndex);
185 CHECK_NULL_RETURN(output_tensor);
186 output_ptr_ = reinterpret_cast<int8_t *>(output_tensor->MutableData());
187 CHECK_NULL_RETURN(output_ptr_);
188
189 if (sliding_->top_ > 0 || sliding_->bottom_ < conv_param_->output_h_ || sliding_->left_ > 0 ||
190 sliding_->right_ < conv_param_->output_w_) {
191 ConvDw3x3Int8Pad(output_ptr_, input_ptr_, packed_weight_, reinterpret_cast<int32_t *>(bias_data_), conv_param_,
192 sliding_);
193 }
194 ret = ParallelLaunch(this->ms_context_, ConvDw3x3Int8Run, this, conv_param_->thread_num_);
195 if (ret != RET_OK) {
196 ms_context_->allocator->Free(buffer_);
197 MS_LOG(ERROR) << "ConvDwInt8Run error: error_code[" << ret << "]";
198 return RET_ERROR;
199 }
200 ms_context_->allocator->Free(buffer_);
201 return RET_OK;
202 }
203 } // namespace mindspore::kernel
204