• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright 2020 Huawei Technologies Co., Ltd
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "src/runtime/kernel/arm/base/convolution_base.h"
18 #include <cfloat>
19 #include "schema/model_generated.h"
20 #include "src/kernel_registry.h"
21 
22 using mindspore::lite::KernelRegistrar;
23 using mindspore::lite::RET_ERROR;
24 using mindspore::lite::RET_MEMORY_FAILED;
25 using mindspore::lite::RET_OK;
26 using mindspore::schema::ActivationType;
27 
28 namespace mindspore::kernel {
MallocAlignedData(size_t alignment,size_t size)29 void *ConvolutionBaseCPUKernel::MallocAlignedData(size_t alignment, size_t size) {
30   auto ptr = malloc(size + alignment);
31   if (ptr == nullptr) {
32     MS_LOG(ERROR) << "MallocAlignedData failed!";
33     return nullptr;
34   }
35   auto aligned_ptr = (reinterpret_cast<uintptr_t>(ptr) + alignment - 1) & (~(alignment - 1));
36   addr_map[aligned_ptr] = ptr;
37   return reinterpret_cast<void *>(aligned_ptr);
38 }
39 
FreeAlignedData(void ** ptr)40 void ConvolutionBaseCPUKernel::FreeAlignedData(void **ptr) {
41   if (*ptr != nullptr && addr_map[reinterpret_cast<uintptr_t>(*ptr)] != nullptr) {
42     free(addr_map[reinterpret_cast<uintptr_t>(*ptr)]);
43     addr_map[reinterpret_cast<uintptr_t>(*ptr)] = nullptr;
44     *ptr = nullptr;
45   }
46 }
47 
~ConvolutionBaseCPUKernel()48 ConvolutionBaseCPUKernel::~ConvolutionBaseCPUKernel() {
49   if (addr_map.find(reinterpret_cast<uintptr_t>(packed_weight_)) != addr_map.end()) {
50     FreeAlignedData(reinterpret_cast<void **>(&packed_weight_));
51   } else if (!op_parameter_->is_train_session_) {
52     if (packed_weight_ != nullptr) {
53       free(packed_weight_);
54       packed_weight_ = nullptr;
55     }
56   }
57   if (addr_map.find(reinterpret_cast<uintptr_t>(bias_data_)) != addr_map.end()) {
58     FreeAlignedData(reinterpret_cast<void **>(&bias_data_));
59   } else if (bias_data_ != nullptr) {
60     free(bias_data_);
61     bias_data_ = nullptr;
62   }
63 }
64 
FreeQuantParam()65 void ConvolutionBaseCPUKernel::FreeQuantParam() {
66   if (conv_quant_arg_ == nullptr) {
67     return;
68   }
69   if (conv_quant_arg_->real_multiplier_ != nullptr) {
70     free(conv_quant_arg_->real_multiplier_);
71     conv_quant_arg_->real_multiplier_ = nullptr;
72   }
73   if (conv_quant_arg_->left_shift_ != nullptr) {
74     free(conv_quant_arg_->left_shift_);
75     conv_quant_arg_->left_shift_ = nullptr;
76   }
77   if (conv_quant_arg_->right_shift_ != nullptr) {
78     free(conv_quant_arg_->right_shift_);
79     conv_quant_arg_->right_shift_ = nullptr;
80   }
81   if (conv_quant_arg_->quant_multiplier_ != nullptr) {
82     free(conv_quant_arg_->quant_multiplier_);
83     conv_quant_arg_->quant_multiplier_ = nullptr;
84   }
85   if (conv_quant_arg_->out_act_min_ != nullptr) {
86     free(conv_quant_arg_->out_act_min_);
87     conv_quant_arg_->out_act_min_ = nullptr;
88   }
89   if (conv_quant_arg_->out_act_max_ != nullptr) {
90     free(conv_quant_arg_->out_act_max_);
91     conv_quant_arg_->out_act_max_ = nullptr;
92   }
93   if (conv_quant_arg_->input_quant_args_ != nullptr) {
94     free(conv_quant_arg_->input_quant_args_);
95     conv_quant_arg_->input_quant_args_ = nullptr;
96   }
97   if (conv_quant_arg_->filter_quant_args_ != nullptr) {
98     free(conv_quant_arg_->filter_quant_args_);
99     conv_quant_arg_->filter_quant_args_ = nullptr;
100   }
101   if (conv_quant_arg_->output_quant_args_ != nullptr) {
102     free(conv_quant_arg_->output_quant_args_);
103     conv_quant_arg_->output_quant_args_ = nullptr;
104   }
105 }
106 
Init()107 int ConvolutionBaseCPUKernel::Init() {
108   auto input = this->in_tensors_.front();
109   auto output = this->out_tensors_.front();
110   CHECK_NULL_RETURN(input);
111   CHECK_NULL_RETURN(output);
112   CHECK_NULL_RETURN(conv_param_);
113   conv_param_->input_batch_ = input->Batch();
114   conv_param_->input_h_ = input->Height();
115   conv_param_->input_w_ = input->Width();
116   conv_param_->input_channel_ = input->Channel();
117   conv_param_->output_batch_ = output->Batch();
118   conv_param_->output_h_ = output->Height();
119   conv_param_->output_w_ = output->Width();
120   conv_param_->output_channel_ = output->Channel();
121   conv_param_->thread_num_ = op_parameter_->thread_num_;
122   return RET_OK;
123 }
124 
InitConvWeightBias()125 int ConvolutionBaseCPUKernel::InitConvWeightBias() {
126   if (op_parameter_->is_train_session_) {
127     UpdateOriginWeightAndBias();
128   }
129   auto weight_tensor = in_tensors_.at(kWeightIndex);
130   CHECK_NULL_RETURN(weight_tensor);
131   auto shape = weight_tensor->shape();
132   if (std::find(shape.begin(), shape.end(), -1) != shape.end()) {
133     MS_LOG(WARNING) << "The shape of weight tensor is not ready, the weight and bias would be inited in runtime.";
134     return lite::RET_OK;
135   }
136   if (MallocWeightBiasData() != RET_OK) {
137     MS_LOG(ERROR) << "Malloc data for bias and weight failed.";
138     return lite::RET_ERROR;
139   }
140 
141   if (in_tensors_.size() == kInputSize2) {
142     memcpy(bias_data_, origin_bias_, in_tensors_.at(kBiasIndex)->Size());
143   } else {
144     MS_ASSERT(in_tensors_.size() == kInputSize1);
145   }
146   if (!op_parameter_->is_train_session_) {
147     if (origin_weight_ != nullptr) {
148       PackWeight();
149     } else {
150       is_repack_ = true;
151       MS_LOG(WARNING) << "The weight is nullptr, will pack in runtime.";
152     }
153   }
154   return lite::RET_OK;
155 }
156 
RepackWeight()157 int ConvolutionBaseCPUKernel::RepackWeight() {
158   origin_weight_ = origin_weight_ != nullptr ? origin_weight_ : in_tensors_.at(kWeightIndex)->MutableData();
159   if (packed_weight_ == nullptr && InitConvWeightBias() != RET_OK) {
160     MS_LOG(ERROR) << "Malloc data for bias and weight failed.";
161     return lite::RET_ERROR;
162   }
163   if (IsRepack() || (op_parameter_->is_train_session_)) {
164     if (op_parameter_->is_train_session_) {
165       packed_weight_ = reinterpret_cast<float *>(workspace());
166       memset(packed_weight_, 0, workspace_size());
167     } else {
168       is_repack_ = false;
169     }
170     PackWeight();
171   }
172   return RET_OK;
173 }
174 
CheckResizeValid()175 int ConvolutionBaseCPUKernel::CheckResizeValid() {
176   // ===============check in channel================= //
177   auto filter_tensor = in_tensors_.at(kWeightIndex);
178   CHECK_NULL_RETURN(filter_tensor);
179   auto filter_in_channel = filter_tensor->Channel();
180   int resize_in_channel = in_tensors_.at(kInputIndex)->Channel();
181   if (filter_in_channel != resize_in_channel) {
182     MS_LOG(ERROR) << "Channel of resized input should be equal to in channel of filter.";
183     return RET_ERROR;
184   }
185   return RET_OK;
186 }
187 
SetIfPerChannel()188 int ConvolutionBaseCPUKernel::SetIfPerChannel() {
189   auto filter_tensor = in_tensors_.at(kWeightIndex);
190   CHECK_NULL_RETURN(filter_tensor);
191   auto input_channel = filter_tensor->Channel();
192   auto output_channel = filter_tensor->Batch();
193 
194   uint8_t per_channel = 0b0;
195   if (conv_quant_arg_->input_arg_num_ != kPerTensor) {
196     if (static_cast<int>(conv_quant_arg_->input_arg_num_) != input_channel) {
197       MS_LOG(ERROR) << "input per channel quant param length is not equal to input channel.";
198       return RET_ERROR;
199     }
200     per_channel = per_channel | INPUT_PER_CHANNEL;
201   }
202 
203   if (conv_quant_arg_->filter_arg_num_ != kPerTensor) {
204     if (static_cast<int>(conv_quant_arg_->filter_arg_num_) != output_channel) {
205       MS_LOG(ERROR) << "weight per channel quant param length is not equal to filter num.";
206       return RET_ERROR;
207     }
208     per_channel = per_channel | FILTER_PER_CHANNEL;
209   }
210 
211   if (conv_quant_arg_->output_arg_num_ != kPerTensor) {
212     if (static_cast<int>(conv_quant_arg_->output_arg_num_) != output_channel) {
213       MS_LOG(ERROR) << "output per channel quant param length is not equal to output channel.";
214       return RET_ERROR;
215     }
216     per_channel = per_channel | OUTPUT_PER_CHANNEL;
217   }
218   conv_quant_arg_->per_channel_ = per_channel;
219   return RET_OK;
220 }
221 
MallocQuantParam()222 int ConvolutionBaseCPUKernel::MallocQuantParam() {
223   conv_quant_arg_ = &conv_param_->conv_quant_arg_;
224   CHECK_NULL_RETURN(conv_quant_arg_);
225   auto input_tensor = in_tensors_.at(kInputIndex);
226   auto weight_tensor = in_tensors_.at(kWeightIndex);
227   auto output_tensor = out_tensors_.at(kOutputIndex);
228   size_t input_arg_num = input_tensor->quant_params().size();
229   size_t filter_arg_num = weight_tensor->quant_params().size();
230   size_t output_arg_num = output_tensor->quant_params().size();
231   conv_quant_arg_->input_arg_num_ = input_arg_num;
232   conv_quant_arg_->filter_arg_num_ = filter_arg_num;
233   conv_quant_arg_->output_arg_num_ = output_arg_num;
234 
235   conv_quant_arg_->input_quant_args_ = reinterpret_cast<QuantArg *>(malloc(input_arg_num * sizeof(QuantArg)));
236   if (conv_quant_arg_->input_quant_args_ == nullptr) {
237     MS_LOG(ERROR) << "malloc input_quant_args_ failed.";
238     return RET_MEMORY_FAILED;
239   }
240   conv_quant_arg_->filter_quant_args_ = reinterpret_cast<QuantArg *>(malloc(filter_arg_num * sizeof(QuantArg)));
241   if (conv_quant_arg_->filter_quant_args_ == nullptr) {
242     MS_LOG(ERROR) << "malloc filter_quant_args_ failed.";
243     return RET_MEMORY_FAILED;
244   }
245   conv_quant_arg_->output_quant_args_ = reinterpret_cast<QuantArg *>(malloc(output_arg_num * sizeof(QuantArg)));
246   if (conv_quant_arg_->output_quant_args_ == nullptr) {
247     MS_LOG(ERROR) << "malloc output_quant_args_ failed.";
248     return RET_MEMORY_FAILED;
249   }
250   return RET_OK;
251 }
252 
SetInputTensorQuantParam()253 int ConvolutionBaseCPUKernel::SetInputTensorQuantParam() {
254   auto input_tensor = in_tensors_.at(kInputIndex);
255   auto in_arg_num = conv_quant_arg_->input_arg_num_;
256   if (in_arg_num == kPerTensor) {
257     auto input_quant_arg = input_tensor->quant_params().front();
258     conv_quant_arg_->input_quant_args_[0].zp_ = input_quant_arg.zeroPoint;
259     conv_quant_arg_->input_quant_args_[0].scale_ = input_quant_arg.scale;
260   } else {
261     // per channel
262     MS_LOG(ERROR) << "Not Support Per Channel for input now.";
263     return RET_ERROR;
264   }
265   return RET_OK;
266 }
267 
SetFilterTensorQuantParam()268 int ConvolutionBaseCPUKernel::SetFilterTensorQuantParam() {
269   auto weight_tensor = in_tensors_.at(kWeightIndex);
270   auto weight_arg_num = conv_quant_arg_->filter_arg_num_;
271   if (weight_arg_num == kPerTensor) {
272     auto weight_quant_arg = weight_tensor->quant_params().front();
273     conv_quant_arg_->filter_quant_args_[0].zp_ = weight_quant_arg.zeroPoint;
274     conv_quant_arg_->filter_quant_args_[0].scale_ = weight_quant_arg.scale;
275   } else {
276     auto weight_quant_arg = weight_tensor->quant_params();
277     for (size_t i = 0; i < weight_arg_num; ++i) {
278       conv_quant_arg_->filter_quant_args_[i].zp_ = weight_quant_arg[i].zeroPoint;
279       conv_quant_arg_->filter_quant_args_[i].scale_ = weight_quant_arg[i].scale;
280     }
281   }
282   return RET_OK;
283 }
284 
SetOutputTensorQuantParam()285 int ConvolutionBaseCPUKernel::SetOutputTensorQuantParam() {
286   auto output_tensor = out_tensors_.at(kOutputIndex);
287   auto out_arg_num = conv_quant_arg_->output_arg_num_;
288   if (out_arg_num == kPerTensor) {
289     auto output_quant_arg = output_tensor->quant_params().front();
290     conv_quant_arg_->output_quant_args_[0].zp_ = output_quant_arg.zeroPoint;
291     conv_quant_arg_->output_quant_args_[0].scale_ = output_quant_arg.scale;
292   } else {
293     MS_LOG(ERROR) << "Not Support Per Channel for input now.";
294     return RET_ERROR;
295   }
296   return RET_OK;
297 }
298 
SetQuantMultiplier()299 int ConvolutionBaseCPUKernel::SetQuantMultiplier() {
300   // now only support weight tensor is per channel, others are per tensor.
301   int weight_arg_num = kPerTensor;
302   if (conv_quant_arg_->per_channel_ & FILTER_PER_CHANNEL) {
303     weight_arg_num = conv_quant_arg_->filter_arg_num_;
304   }
305   conv_quant_arg_->real_multiplier_ = reinterpret_cast<double *>(malloc(weight_arg_num * sizeof(double)));
306   if (conv_quant_arg_->real_multiplier_ == nullptr) {
307     MS_LOG(ERROR) << "malloc conv_quant_arg_->real_multiplier_ failed.";
308     return RET_MEMORY_FAILED;
309   }
310   conv_quant_arg_->left_shift_ = reinterpret_cast<int32_t *>(malloc(weight_arg_num * sizeof(int32_t)));
311   if (conv_quant_arg_->left_shift_ == nullptr) {
312     MS_LOG(ERROR) << "malloc conv_quant_arg_->left_shift_ failed.";
313     return RET_MEMORY_FAILED;
314   }
315   conv_quant_arg_->right_shift_ = reinterpret_cast<int32_t *>(malloc(weight_arg_num * sizeof(int32_t)));
316   if (conv_quant_arg_->right_shift_ == nullptr) {
317     MS_LOG(ERROR) << "malloc conv_quant_arg_->right_shift_ failed.";
318     return RET_MEMORY_FAILED;
319   }
320   conv_quant_arg_->quant_multiplier_ = reinterpret_cast<int32_t *>(malloc(weight_arg_num * sizeof(int32_t)));
321   if (conv_quant_arg_->quant_multiplier_ == nullptr) {
322     MS_LOG(ERROR) << "malloc conv_quant_arg_->quant_multiplier_ failed.";
323     return RET_MEMORY_FAILED;
324   }
325   conv_quant_arg_->out_act_min_ = reinterpret_cast<int32_t *>(malloc(sizeof(int32_t)));
326   if (conv_quant_arg_->out_act_min_ == nullptr) {
327     MS_LOG(ERROR) << "malloc conv_quant_arg_->out_act_min_ failed.";
328     return RET_MEMORY_FAILED;
329   }
330   conv_quant_arg_->out_act_max_ = reinterpret_cast<int32_t *>(malloc(sizeof(int32_t)));
331   if (conv_quant_arg_->out_act_max_ == nullptr) {
332     MS_LOG(ERROR) << "malloc conv_quant_arg_->out_act_max_ failed.";
333     return RET_MEMORY_FAILED;
334   }
335 
336   for (int i = 0; i < weight_arg_num; ++i) {
337     const double in_scale =
338       static_cast<double>(conv_quant_arg_->input_quant_args_[0].scale_ * conv_quant_arg_->filter_quant_args_[i].scale_);
339     double real_multiplier = in_scale / static_cast<double>(conv_quant_arg_->output_quant_args_[0].scale_);
340     conv_quant_arg_->real_multiplier_[i] = real_multiplier;
341     if (conv_quant_arg_->quant_multiplier_mode_ == Method_SinglePrecision) {
342       QuantizeRoundParameterWithSinglePrecision(real_multiplier, &conv_quant_arg_->quant_multiplier_[i],
343                                                 &conv_quant_arg_->left_shift_[i], &conv_quant_arg_->right_shift_[i]);
344     } else if (conv_quant_arg_->quant_multiplier_mode_ == Method_DoublePrecision) {
345       QuantizeRoundParameterWithDoublePrecision(real_multiplier, &conv_quant_arg_->quant_multiplier_[i],
346                                                 &conv_quant_arg_->left_shift_[i], &conv_quant_arg_->right_shift_[i]);
347     }
348   }
349   return RET_OK;
350 }
351 
SetRoundingAndMultipilerMode()352 void ConvolutionBaseCPUKernel::SetRoundingAndMultipilerMode() {
353   auto input_quant_arg = in_tensors_.at(kInputIndex)->quant_params().front();
354   int round_type = input_quant_arg.roundType;
355   switch (round_type) {
356     case 1:
357       conv_quant_arg_->round_mode_ = Rounding_Away_from_zero;
358       break;
359     case 2:
360       conv_quant_arg_->round_mode_ = Rounding_Up;
361       break;
362     default:
363       conv_quant_arg_->round_mode_ = Rounding_No;
364   }
365   int cal_multiplier_type = input_quant_arg.multiplier;
366   switch (cal_multiplier_type) {
367     case 0:
368       conv_quant_arg_->quant_multiplier_mode_ = Method_SinglePrecision;
369       break;
370     case 1:
371       conv_quant_arg_->quant_multiplier_mode_ = Method_DoublePrecision;
372       break;
373     default:
374       conv_quant_arg_->quant_multiplier_mode_ = Method_No;
375   }
376 }
377 
SetQuantParam()378 int ConvolutionBaseCPUKernel::SetQuantParam() {
379   auto ret = MallocQuantParam();
380   if (ret != RET_OK) {
381     MS_LOG(ERROR) << "Malloc quant param failed.";
382     return ret;
383   }
384   ret = SetInputTensorQuantParam();
385   if (ret != RET_OK) {
386     MS_LOG(ERROR) << "Set Input Tensor Quant Param Failed.";
387     return ret;
388   }
389   ret = SetFilterTensorQuantParam();
390   if (ret != RET_OK) {
391     MS_LOG(ERROR) << "Set Filter Tensor Quant Param Failed.";
392     return ret;
393   }
394   ret = SetOutputTensorQuantParam();
395   if (ret != RET_OK) {
396     MS_LOG(ERROR) << "Set Output Tensor Quant Param Failed.";
397     return ret;
398   }
399   ret = SetIfPerChannel();
400   if (ret != RET_OK) {
401     MS_LOG(ERROR) << "Set if per tensor channel failed.";
402     return ret;
403   }
404   SetRoundingAndMultipilerMode();
405   ret = SetQuantMultiplier();
406   if (ret != RET_OK) {
407     MS_LOG(ERROR) << "Set Quant Multiplier Failed.";
408     return ret;
409   }
410   bool relu = conv_param_->act_type_ == ActType_Relu;
411   bool relu6 = conv_param_->act_type_ == ActType_Relu6;
412   CalculateActivationRangeQuantized(relu, relu6, conv_param_->conv_quant_arg_.output_quant_args_[0].zp_,
413                                     conv_param_->conv_quant_arg_.output_quant_args_[0].scale_,
414                                     &conv_param_->conv_quant_arg_.out_act_min_[0],
415                                     &conv_param_->conv_quant_arg_.out_act_max_[0]);
416   return RET_OK;
417 }
418 
UpdateOriginWeightAndBias()419 void ConvolutionBaseCPUKernel::UpdateOriginWeightAndBias() {
420   if (in_tensors_.at(kWeightIndex)->data() != nullptr) {
421     origin_weight_ = in_tensors_.at(kWeightIndex)->data();
422   }
423   if (in_tensors_.size() == kInputSize2 && in_tensors_.at(kBiasIndex)->data() != nullptr) {
424     origin_bias_ = in_tensors_.at(kBiasIndex)->data();
425   }
426 }
427 }  // namespace mindspore::kernel
428