1 /**
2 * Copyright 2020 Huawei Technologies Co., Ltd
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "src/runtime/kernel/arm/base/convolution_base.h"
18 #include <cfloat>
19 #include "schema/model_generated.h"
20 #include "src/kernel_registry.h"
21
22 using mindspore::lite::KernelRegistrar;
23 using mindspore::lite::RET_ERROR;
24 using mindspore::lite::RET_MEMORY_FAILED;
25 using mindspore::lite::RET_OK;
26 using mindspore::schema::ActivationType;
27
28 namespace mindspore::kernel {
MallocAlignedData(size_t alignment,size_t size)29 void *ConvolutionBaseCPUKernel::MallocAlignedData(size_t alignment, size_t size) {
30 auto ptr = malloc(size + alignment);
31 if (ptr == nullptr) {
32 MS_LOG(ERROR) << "MallocAlignedData failed!";
33 return nullptr;
34 }
35 auto aligned_ptr = (reinterpret_cast<uintptr_t>(ptr) + alignment - 1) & (~(alignment - 1));
36 addr_map[aligned_ptr] = ptr;
37 return reinterpret_cast<void *>(aligned_ptr);
38 }
39
FreeAlignedData(void ** ptr)40 void ConvolutionBaseCPUKernel::FreeAlignedData(void **ptr) {
41 if (*ptr != nullptr && addr_map[reinterpret_cast<uintptr_t>(*ptr)] != nullptr) {
42 free(addr_map[reinterpret_cast<uintptr_t>(*ptr)]);
43 addr_map[reinterpret_cast<uintptr_t>(*ptr)] = nullptr;
44 *ptr = nullptr;
45 }
46 }
47
~ConvolutionBaseCPUKernel()48 ConvolutionBaseCPUKernel::~ConvolutionBaseCPUKernel() {
49 if (addr_map.find(reinterpret_cast<uintptr_t>(packed_weight_)) != addr_map.end()) {
50 FreeAlignedData(reinterpret_cast<void **>(&packed_weight_));
51 } else if (!op_parameter_->is_train_session_) {
52 if (packed_weight_ != nullptr) {
53 free(packed_weight_);
54 packed_weight_ = nullptr;
55 }
56 }
57 if (addr_map.find(reinterpret_cast<uintptr_t>(bias_data_)) != addr_map.end()) {
58 FreeAlignedData(reinterpret_cast<void **>(&bias_data_));
59 } else if (bias_data_ != nullptr) {
60 free(bias_data_);
61 bias_data_ = nullptr;
62 }
63 }
64
FreeQuantParam()65 void ConvolutionBaseCPUKernel::FreeQuantParam() {
66 if (conv_quant_arg_ == nullptr) {
67 return;
68 }
69 if (conv_quant_arg_->real_multiplier_ != nullptr) {
70 free(conv_quant_arg_->real_multiplier_);
71 conv_quant_arg_->real_multiplier_ = nullptr;
72 }
73 if (conv_quant_arg_->left_shift_ != nullptr) {
74 free(conv_quant_arg_->left_shift_);
75 conv_quant_arg_->left_shift_ = nullptr;
76 }
77 if (conv_quant_arg_->right_shift_ != nullptr) {
78 free(conv_quant_arg_->right_shift_);
79 conv_quant_arg_->right_shift_ = nullptr;
80 }
81 if (conv_quant_arg_->quant_multiplier_ != nullptr) {
82 free(conv_quant_arg_->quant_multiplier_);
83 conv_quant_arg_->quant_multiplier_ = nullptr;
84 }
85 if (conv_quant_arg_->out_act_min_ != nullptr) {
86 free(conv_quant_arg_->out_act_min_);
87 conv_quant_arg_->out_act_min_ = nullptr;
88 }
89 if (conv_quant_arg_->out_act_max_ != nullptr) {
90 free(conv_quant_arg_->out_act_max_);
91 conv_quant_arg_->out_act_max_ = nullptr;
92 }
93 if (conv_quant_arg_->input_quant_args_ != nullptr) {
94 free(conv_quant_arg_->input_quant_args_);
95 conv_quant_arg_->input_quant_args_ = nullptr;
96 }
97 if (conv_quant_arg_->filter_quant_args_ != nullptr) {
98 free(conv_quant_arg_->filter_quant_args_);
99 conv_quant_arg_->filter_quant_args_ = nullptr;
100 }
101 if (conv_quant_arg_->output_quant_args_ != nullptr) {
102 free(conv_quant_arg_->output_quant_args_);
103 conv_quant_arg_->output_quant_args_ = nullptr;
104 }
105 }
106
Init()107 int ConvolutionBaseCPUKernel::Init() {
108 auto input = this->in_tensors_.front();
109 auto output = this->out_tensors_.front();
110 CHECK_NULL_RETURN(input);
111 CHECK_NULL_RETURN(output);
112 CHECK_NULL_RETURN(conv_param_);
113 conv_param_->input_batch_ = input->Batch();
114 conv_param_->input_h_ = input->Height();
115 conv_param_->input_w_ = input->Width();
116 conv_param_->input_channel_ = input->Channel();
117 conv_param_->output_batch_ = output->Batch();
118 conv_param_->output_h_ = output->Height();
119 conv_param_->output_w_ = output->Width();
120 conv_param_->output_channel_ = output->Channel();
121 conv_param_->thread_num_ = op_parameter_->thread_num_;
122 return RET_OK;
123 }
124
InitConvWeightBias()125 int ConvolutionBaseCPUKernel::InitConvWeightBias() {
126 if (op_parameter_->is_train_session_) {
127 UpdateOriginWeightAndBias();
128 }
129 auto weight_tensor = in_tensors_.at(kWeightIndex);
130 CHECK_NULL_RETURN(weight_tensor);
131 auto shape = weight_tensor->shape();
132 if (std::find(shape.begin(), shape.end(), -1) != shape.end()) {
133 MS_LOG(WARNING) << "The shape of weight tensor is not ready, the weight and bias would be inited in runtime.";
134 return lite::RET_OK;
135 }
136 if (MallocWeightBiasData() != RET_OK) {
137 MS_LOG(ERROR) << "Malloc data for bias and weight failed.";
138 return lite::RET_ERROR;
139 }
140
141 if (in_tensors_.size() == kInputSize2) {
142 memcpy(bias_data_, origin_bias_, in_tensors_.at(kBiasIndex)->Size());
143 } else {
144 MS_ASSERT(in_tensors_.size() == kInputSize1);
145 }
146 if (!op_parameter_->is_train_session_) {
147 if (origin_weight_ != nullptr) {
148 PackWeight();
149 } else {
150 is_repack_ = true;
151 MS_LOG(WARNING) << "The weight is nullptr, will pack in runtime.";
152 }
153 }
154 return lite::RET_OK;
155 }
156
RepackWeight()157 int ConvolutionBaseCPUKernel::RepackWeight() {
158 origin_weight_ = origin_weight_ != nullptr ? origin_weight_ : in_tensors_.at(kWeightIndex)->MutableData();
159 if (packed_weight_ == nullptr && InitConvWeightBias() != RET_OK) {
160 MS_LOG(ERROR) << "Malloc data for bias and weight failed.";
161 return lite::RET_ERROR;
162 }
163 if (IsRepack() || (op_parameter_->is_train_session_)) {
164 if (op_parameter_->is_train_session_) {
165 packed_weight_ = reinterpret_cast<float *>(workspace());
166 memset(packed_weight_, 0, workspace_size());
167 } else {
168 is_repack_ = false;
169 }
170 PackWeight();
171 }
172 return RET_OK;
173 }
174
CheckResizeValid()175 int ConvolutionBaseCPUKernel::CheckResizeValid() {
176 // ===============check in channel================= //
177 auto filter_tensor = in_tensors_.at(kWeightIndex);
178 CHECK_NULL_RETURN(filter_tensor);
179 auto filter_in_channel = filter_tensor->Channel();
180 int resize_in_channel = in_tensors_.at(kInputIndex)->Channel();
181 if (filter_in_channel != resize_in_channel) {
182 MS_LOG(ERROR) << "Channel of resized input should be equal to in channel of filter.";
183 return RET_ERROR;
184 }
185 return RET_OK;
186 }
187
SetIfPerChannel()188 int ConvolutionBaseCPUKernel::SetIfPerChannel() {
189 auto filter_tensor = in_tensors_.at(kWeightIndex);
190 CHECK_NULL_RETURN(filter_tensor);
191 auto input_channel = filter_tensor->Channel();
192 auto output_channel = filter_tensor->Batch();
193
194 uint8_t per_channel = 0b0;
195 if (conv_quant_arg_->input_arg_num_ != kPerTensor) {
196 if (static_cast<int>(conv_quant_arg_->input_arg_num_) != input_channel) {
197 MS_LOG(ERROR) << "input per channel quant param length is not equal to input channel.";
198 return RET_ERROR;
199 }
200 per_channel = per_channel | INPUT_PER_CHANNEL;
201 }
202
203 if (conv_quant_arg_->filter_arg_num_ != kPerTensor) {
204 if (static_cast<int>(conv_quant_arg_->filter_arg_num_) != output_channel) {
205 MS_LOG(ERROR) << "weight per channel quant param length is not equal to filter num.";
206 return RET_ERROR;
207 }
208 per_channel = per_channel | FILTER_PER_CHANNEL;
209 }
210
211 if (conv_quant_arg_->output_arg_num_ != kPerTensor) {
212 if (static_cast<int>(conv_quant_arg_->output_arg_num_) != output_channel) {
213 MS_LOG(ERROR) << "output per channel quant param length is not equal to output channel.";
214 return RET_ERROR;
215 }
216 per_channel = per_channel | OUTPUT_PER_CHANNEL;
217 }
218 conv_quant_arg_->per_channel_ = per_channel;
219 return RET_OK;
220 }
221
MallocQuantParam()222 int ConvolutionBaseCPUKernel::MallocQuantParam() {
223 conv_quant_arg_ = &conv_param_->conv_quant_arg_;
224 CHECK_NULL_RETURN(conv_quant_arg_);
225 auto input_tensor = in_tensors_.at(kInputIndex);
226 auto weight_tensor = in_tensors_.at(kWeightIndex);
227 auto output_tensor = out_tensors_.at(kOutputIndex);
228 size_t input_arg_num = input_tensor->quant_params().size();
229 size_t filter_arg_num = weight_tensor->quant_params().size();
230 size_t output_arg_num = output_tensor->quant_params().size();
231 conv_quant_arg_->input_arg_num_ = input_arg_num;
232 conv_quant_arg_->filter_arg_num_ = filter_arg_num;
233 conv_quant_arg_->output_arg_num_ = output_arg_num;
234
235 conv_quant_arg_->input_quant_args_ = reinterpret_cast<QuantArg *>(malloc(input_arg_num * sizeof(QuantArg)));
236 if (conv_quant_arg_->input_quant_args_ == nullptr) {
237 MS_LOG(ERROR) << "malloc input_quant_args_ failed.";
238 return RET_MEMORY_FAILED;
239 }
240 conv_quant_arg_->filter_quant_args_ = reinterpret_cast<QuantArg *>(malloc(filter_arg_num * sizeof(QuantArg)));
241 if (conv_quant_arg_->filter_quant_args_ == nullptr) {
242 MS_LOG(ERROR) << "malloc filter_quant_args_ failed.";
243 return RET_MEMORY_FAILED;
244 }
245 conv_quant_arg_->output_quant_args_ = reinterpret_cast<QuantArg *>(malloc(output_arg_num * sizeof(QuantArg)));
246 if (conv_quant_arg_->output_quant_args_ == nullptr) {
247 MS_LOG(ERROR) << "malloc output_quant_args_ failed.";
248 return RET_MEMORY_FAILED;
249 }
250 return RET_OK;
251 }
252
SetInputTensorQuantParam()253 int ConvolutionBaseCPUKernel::SetInputTensorQuantParam() {
254 auto input_tensor = in_tensors_.at(kInputIndex);
255 auto in_arg_num = conv_quant_arg_->input_arg_num_;
256 if (in_arg_num == kPerTensor) {
257 auto input_quant_arg = input_tensor->quant_params().front();
258 conv_quant_arg_->input_quant_args_[0].zp_ = input_quant_arg.zeroPoint;
259 conv_quant_arg_->input_quant_args_[0].scale_ = input_quant_arg.scale;
260 } else {
261 // per channel
262 MS_LOG(ERROR) << "Not Support Per Channel for input now.";
263 return RET_ERROR;
264 }
265 return RET_OK;
266 }
267
SetFilterTensorQuantParam()268 int ConvolutionBaseCPUKernel::SetFilterTensorQuantParam() {
269 auto weight_tensor = in_tensors_.at(kWeightIndex);
270 auto weight_arg_num = conv_quant_arg_->filter_arg_num_;
271 if (weight_arg_num == kPerTensor) {
272 auto weight_quant_arg = weight_tensor->quant_params().front();
273 conv_quant_arg_->filter_quant_args_[0].zp_ = weight_quant_arg.zeroPoint;
274 conv_quant_arg_->filter_quant_args_[0].scale_ = weight_quant_arg.scale;
275 } else {
276 auto weight_quant_arg = weight_tensor->quant_params();
277 for (size_t i = 0; i < weight_arg_num; ++i) {
278 conv_quant_arg_->filter_quant_args_[i].zp_ = weight_quant_arg[i].zeroPoint;
279 conv_quant_arg_->filter_quant_args_[i].scale_ = weight_quant_arg[i].scale;
280 }
281 }
282 return RET_OK;
283 }
284
SetOutputTensorQuantParam()285 int ConvolutionBaseCPUKernel::SetOutputTensorQuantParam() {
286 auto output_tensor = out_tensors_.at(kOutputIndex);
287 auto out_arg_num = conv_quant_arg_->output_arg_num_;
288 if (out_arg_num == kPerTensor) {
289 auto output_quant_arg = output_tensor->quant_params().front();
290 conv_quant_arg_->output_quant_args_[0].zp_ = output_quant_arg.zeroPoint;
291 conv_quant_arg_->output_quant_args_[0].scale_ = output_quant_arg.scale;
292 } else {
293 MS_LOG(ERROR) << "Not Support Per Channel for input now.";
294 return RET_ERROR;
295 }
296 return RET_OK;
297 }
298
SetQuantMultiplier()299 int ConvolutionBaseCPUKernel::SetQuantMultiplier() {
300 // now only support weight tensor is per channel, others are per tensor.
301 int weight_arg_num = kPerTensor;
302 if (conv_quant_arg_->per_channel_ & FILTER_PER_CHANNEL) {
303 weight_arg_num = conv_quant_arg_->filter_arg_num_;
304 }
305 conv_quant_arg_->real_multiplier_ = reinterpret_cast<double *>(malloc(weight_arg_num * sizeof(double)));
306 if (conv_quant_arg_->real_multiplier_ == nullptr) {
307 MS_LOG(ERROR) << "malloc conv_quant_arg_->real_multiplier_ failed.";
308 return RET_MEMORY_FAILED;
309 }
310 conv_quant_arg_->left_shift_ = reinterpret_cast<int32_t *>(malloc(weight_arg_num * sizeof(int32_t)));
311 if (conv_quant_arg_->left_shift_ == nullptr) {
312 MS_LOG(ERROR) << "malloc conv_quant_arg_->left_shift_ failed.";
313 return RET_MEMORY_FAILED;
314 }
315 conv_quant_arg_->right_shift_ = reinterpret_cast<int32_t *>(malloc(weight_arg_num * sizeof(int32_t)));
316 if (conv_quant_arg_->right_shift_ == nullptr) {
317 MS_LOG(ERROR) << "malloc conv_quant_arg_->right_shift_ failed.";
318 return RET_MEMORY_FAILED;
319 }
320 conv_quant_arg_->quant_multiplier_ = reinterpret_cast<int32_t *>(malloc(weight_arg_num * sizeof(int32_t)));
321 if (conv_quant_arg_->quant_multiplier_ == nullptr) {
322 MS_LOG(ERROR) << "malloc conv_quant_arg_->quant_multiplier_ failed.";
323 return RET_MEMORY_FAILED;
324 }
325 conv_quant_arg_->out_act_min_ = reinterpret_cast<int32_t *>(malloc(sizeof(int32_t)));
326 if (conv_quant_arg_->out_act_min_ == nullptr) {
327 MS_LOG(ERROR) << "malloc conv_quant_arg_->out_act_min_ failed.";
328 return RET_MEMORY_FAILED;
329 }
330 conv_quant_arg_->out_act_max_ = reinterpret_cast<int32_t *>(malloc(sizeof(int32_t)));
331 if (conv_quant_arg_->out_act_max_ == nullptr) {
332 MS_LOG(ERROR) << "malloc conv_quant_arg_->out_act_max_ failed.";
333 return RET_MEMORY_FAILED;
334 }
335
336 for (int i = 0; i < weight_arg_num; ++i) {
337 const double in_scale =
338 static_cast<double>(conv_quant_arg_->input_quant_args_[0].scale_ * conv_quant_arg_->filter_quant_args_[i].scale_);
339 double real_multiplier = in_scale / static_cast<double>(conv_quant_arg_->output_quant_args_[0].scale_);
340 conv_quant_arg_->real_multiplier_[i] = real_multiplier;
341 if (conv_quant_arg_->quant_multiplier_mode_ == Method_SinglePrecision) {
342 QuantizeRoundParameterWithSinglePrecision(real_multiplier, &conv_quant_arg_->quant_multiplier_[i],
343 &conv_quant_arg_->left_shift_[i], &conv_quant_arg_->right_shift_[i]);
344 } else if (conv_quant_arg_->quant_multiplier_mode_ == Method_DoublePrecision) {
345 QuantizeRoundParameterWithDoublePrecision(real_multiplier, &conv_quant_arg_->quant_multiplier_[i],
346 &conv_quant_arg_->left_shift_[i], &conv_quant_arg_->right_shift_[i]);
347 }
348 }
349 return RET_OK;
350 }
351
SetRoundingAndMultipilerMode()352 void ConvolutionBaseCPUKernel::SetRoundingAndMultipilerMode() {
353 auto input_quant_arg = in_tensors_.at(kInputIndex)->quant_params().front();
354 int round_type = input_quant_arg.roundType;
355 switch (round_type) {
356 case 1:
357 conv_quant_arg_->round_mode_ = Rounding_Away_from_zero;
358 break;
359 case 2:
360 conv_quant_arg_->round_mode_ = Rounding_Up;
361 break;
362 default:
363 conv_quant_arg_->round_mode_ = Rounding_No;
364 }
365 int cal_multiplier_type = input_quant_arg.multiplier;
366 switch (cal_multiplier_type) {
367 case 0:
368 conv_quant_arg_->quant_multiplier_mode_ = Method_SinglePrecision;
369 break;
370 case 1:
371 conv_quant_arg_->quant_multiplier_mode_ = Method_DoublePrecision;
372 break;
373 default:
374 conv_quant_arg_->quant_multiplier_mode_ = Method_No;
375 }
376 }
377
SetQuantParam()378 int ConvolutionBaseCPUKernel::SetQuantParam() {
379 auto ret = MallocQuantParam();
380 if (ret != RET_OK) {
381 MS_LOG(ERROR) << "Malloc quant param failed.";
382 return ret;
383 }
384 ret = SetInputTensorQuantParam();
385 if (ret != RET_OK) {
386 MS_LOG(ERROR) << "Set Input Tensor Quant Param Failed.";
387 return ret;
388 }
389 ret = SetFilterTensorQuantParam();
390 if (ret != RET_OK) {
391 MS_LOG(ERROR) << "Set Filter Tensor Quant Param Failed.";
392 return ret;
393 }
394 ret = SetOutputTensorQuantParam();
395 if (ret != RET_OK) {
396 MS_LOG(ERROR) << "Set Output Tensor Quant Param Failed.";
397 return ret;
398 }
399 ret = SetIfPerChannel();
400 if (ret != RET_OK) {
401 MS_LOG(ERROR) << "Set if per tensor channel failed.";
402 return ret;
403 }
404 SetRoundingAndMultipilerMode();
405 ret = SetQuantMultiplier();
406 if (ret != RET_OK) {
407 MS_LOG(ERROR) << "Set Quant Multiplier Failed.";
408 return ret;
409 }
410 bool relu = conv_param_->act_type_ == ActType_Relu;
411 bool relu6 = conv_param_->act_type_ == ActType_Relu6;
412 CalculateActivationRangeQuantized(relu, relu6, conv_param_->conv_quant_arg_.output_quant_args_[0].zp_,
413 conv_param_->conv_quant_arg_.output_quant_args_[0].scale_,
414 &conv_param_->conv_quant_arg_.out_act_min_[0],
415 &conv_param_->conv_quant_arg_.out_act_max_[0]);
416 return RET_OK;
417 }
418
UpdateOriginWeightAndBias()419 void ConvolutionBaseCPUKernel::UpdateOriginWeightAndBias() {
420 if (in_tensors_.at(kWeightIndex)->data() != nullptr) {
421 origin_weight_ = in_tensors_.at(kWeightIndex)->data();
422 }
423 if (in_tensors_.size() == kInputSize2 && in_tensors_.at(kBiasIndex)->data() != nullptr) {
424 origin_bias_ = in_tensors_.at(kBiasIndex)->data();
425 }
426 }
427 } // namespace mindspore::kernel
428