• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright 2020 Huawei Technologies Co., Ltd
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "src/runtime/kernel/arm/int8/convolution_depthwise_3x3_int8.h"
18 #include "include/errorcode.h"
19 #include "nnacl/int8/conv_depthwise_int8.h"
20 
21 using mindspore::lite::RET_ERROR;
22 using mindspore::lite::RET_OK;
23 
24 namespace mindspore::kernel {
25 namespace {
26 constexpr int kConvDepthwise3x3BufferSize = 64 * 10 * 10;
27 constexpr int kChannelUnit = 8;
28 }  // namespace
~ConvolutionDepthwise3x3Int8CPUKernel()29 ConvolutionDepthwise3x3Int8CPUKernel::~ConvolutionDepthwise3x3Int8CPUKernel() {
30   if (sliding_ != nullptr) {
31     delete sliding_;
32     sliding_ = nullptr;
33   }
34   if (packed_weight_ != nullptr) {
35     free(packed_weight_);
36     packed_weight_ = nullptr;
37   }
38   FreeQuantParam();
39 }
40 
InitWeightBias()41 int ConvolutionDepthwise3x3Int8CPUKernel::InitWeightBias() {
42   CHECK_NULL_RETURN(conv_param_);
43   // init weight, int8 -> int16
44   auto weight_tensor = in_tensors_.at(kWeightIndex);
45   CHECK_NULL_RETURN(weight_tensor);
46   auto origin_weight = reinterpret_cast<int8_t *>(weight_tensor->MutableData());
47   CHECK_NULL_RETURN(origin_weight);
48   int channel = weight_tensor->Batch();
49   if (channel < 0) {
50     MS_LOG(ERROR) << "get bach from weight_tensor failed.";
51     return RET_ERROR;
52   }
53   if (channel % kChannelUnit != 0) {
54     MS_LOG(ERROR) << "ConvolutionDepthwise3x3Int8CPUKernel doesn't support channel " << channel;
55     return RET_ERROR;
56   }
57   int pack_weight_size = channel * weight_tensor->Height() * weight_tensor->Width();
58   auto tmp_weight = reinterpret_cast<int8_t *>(malloc(pack_weight_size * sizeof(int8_t)));
59   if (tmp_weight == nullptr) {
60     MS_LOG(ERROR) << "Malloc buffer failed.";
61     return RET_ERROR;
62   }
63   PackNCHWToNHWCInt8(origin_weight, tmp_weight, 1, weight_tensor->Height() * weight_tensor->Width(),
64                      weight_tensor->Batch());
65 
66   packed_weight_ = reinterpret_cast<int16_t *>(malloc(static_cast<size_t>(pack_weight_size) * sizeof(int16_t)));
67   if (packed_weight_ == nullptr) {
68     MS_LOG(ERROR) << "Malloc buffer failed.";
69     free(tmp_weight);
70     return RET_ERROR;
71   }
72   bool filter_per_channel = static_cast<bool>(conv_param_->conv_quant_arg_.per_channel_ & FILTER_PER_CHANNEL);
73   if (filter_per_channel) {
74     for (int i = 0; i < weight_tensor->Height() * weight_tensor->Width(); i++) {
75       for (int c = 0; c < channel; c++) {
76         int per_channel_weight_zp = conv_param_->conv_quant_arg_.filter_quant_args_[c].zp_;
77         packed_weight_[i * channel + c] = (int16_t)(tmp_weight[i * channel + c] - per_channel_weight_zp);
78       }
79     }
80   } else {
81     int weight_zp = conv_param_->conv_quant_arg_.filter_quant_args_[0].zp_;
82     if (weight_tensor->ElementsNum() > pack_weight_size) {
83       MS_LOG(ERROR) << "weight_tensor->ElementsNum() is larger than pack_weight_size.";
84       free(tmp_weight);
85       return RET_ERROR;
86     }
87     for (int i = 0; i < weight_tensor->ElementsNum(); i++) {
88       packed_weight_[i] = (int16_t)(tmp_weight[i] - weight_zp);
89     }
90   }
91   free(tmp_weight);
92 
93   bias_data_ = reinterpret_cast<int32_t *>(malloc(static_cast<size_t>(channel) * sizeof(int32_t)));
94   if (bias_data_ == nullptr) {
95     MS_LOG(ERROR) << "Malloc buffer failed.";
96     return RET_ERROR;
97   }
98   memset(bias_data_, 0, static_cast<size_t>(channel) * sizeof(int32_t));
99   if (in_tensors_.size() == kInputSize2) {
100     auto bias_tensor = in_tensors_.at(kBiasIndex);
101     CHECK_NULL_RETURN(bias_tensor);
102     auto ori_bias = reinterpret_cast<int32_t *>(bias_tensor->MutableData());
103     CHECK_NULL_RETURN(ori_bias);
104     memcpy(bias_data_, ori_bias, static_cast<size_t>(bias_tensor->ElementsNum()) * sizeof(int32_t));
105   }
106   return RET_OK;
107 }
108 
Init()109 int ConvolutionDepthwise3x3Int8CPUKernel::Init() {
110   CHECK_LESS_RETURN(in_tensors_.size(), 2);
111   CHECK_LESS_RETURN(out_tensors_.size(), 1);
112   sliding_ = new (std::nothrow) SlidingWindowParam;
113   if (sliding_ == nullptr) {
114     MS_LOG(ERROR) << "new sliding window param.";
115     return RET_ERROR;
116   }
117   auto ret = ConvolutionBaseCPUKernel::SetQuantParam();
118   if (ret != RET_OK) {
119     MS_LOG(ERROR) << "Set quant param failed.";
120     return ret;
121   }
122   ret = InitWeightBias();
123   if (ret != RET_OK) {
124     MS_LOG(ERROR) << "Depthwise int8 InitWeightBias error!";
125     return ret;
126   }
127   if (!InferShapeDone()) {
128     return RET_OK;
129   }
130   return ReSize();
131 }
132 
ReSize()133 int ConvolutionDepthwise3x3Int8CPUKernel::ReSize() {
134   auto ret = ConvolutionBaseCPUKernel::Init();
135   if (ret != RET_OK) {
136     MS_LOG(ERROR) << "ConvolutionBaseCPUKernel Init failed.";
137     return ret;
138   }
139   InitSlidingParamConvDw(sliding_, conv_param_, conv_param_->input_channel_);
140   conv_param_->thread_num_ = MSMIN(thread_count_, conv_param_->output_h_);
141   return RET_OK;
142 }
143 
DoExecute(int task_id)144 int ConvolutionDepthwise3x3Int8CPUKernel::DoExecute(int task_id) {
145   auto buffer = buffer_ + kConvDepthwise3x3BufferSize * task_id;
146   ConvDw3x3Int8(output_ptr_, buffer, input_ptr_, packed_weight_, reinterpret_cast<int32_t *>(bias_data_), conv_param_,
147                 sliding_, task_id);
148   return RET_OK;
149 }
150 
ConvDw3x3Int8Run(void * cdata,int task_id,float lhs_scale,float rhs_scale)151 int ConvDw3x3Int8Run(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
152   auto conv_dw_int8 = reinterpret_cast<ConvolutionDepthwise3x3Int8CPUKernel *>(cdata);
153   auto ret = conv_dw_int8->DoExecute(task_id);
154   if (ret != RET_OK) {
155     MS_LOG(ERROR) << "ConvolutionDepthwise3x3Int8Run error task_id[" << task_id << "] error_code[" << ret << "]";
156     return RET_ERROR;
157   }
158   return RET_OK;
159 }
160 
InitBuffer()161 int ConvolutionDepthwise3x3Int8CPUKernel::InitBuffer() {
162   int buffer_size = kConvDepthwise3x3BufferSize * conv_param_->thread_num_;
163   buffer_ =
164     reinterpret_cast<int8_t *>(ms_context_->allocator->Malloc(static_cast<size_t>(buffer_size) * sizeof(int8_t)));
165   if (buffer_ == nullptr) {
166     MS_LOG(ERROR) << "Malloc buffer failed.";
167     return RET_ERROR;
168   }
169   return RET_OK;
170 }
171 
Run()172 int ConvolutionDepthwise3x3Int8CPUKernel::Run() {
173   auto ret = InitBuffer();
174   if (ret != RET_OK) {
175     MS_LOG(ERROR) << "Depthwise int8 ReSize error!";
176     return ret;
177   }
178 
179   auto input_tensor = in_tensors_.at(kInputIndex);
180   CHECK_NULL_RETURN(input_tensor);
181   input_ptr_ = reinterpret_cast<int8_t *>(input_tensor->MutableData());
182   CHECK_NULL_RETURN(input_ptr_);
183 
184   auto output_tensor = out_tensors_.at(kOutputIndex);
185   CHECK_NULL_RETURN(output_tensor);
186   output_ptr_ = reinterpret_cast<int8_t *>(output_tensor->MutableData());
187   CHECK_NULL_RETURN(output_ptr_);
188 
189   if (sliding_->top_ > 0 || sliding_->bottom_ < conv_param_->output_h_ || sliding_->left_ > 0 ||
190       sliding_->right_ < conv_param_->output_w_) {
191     ConvDw3x3Int8Pad(output_ptr_, input_ptr_, packed_weight_, reinterpret_cast<int32_t *>(bias_data_), conv_param_,
192                      sliding_);
193   }
194   ret = ParallelLaunch(this->ms_context_, ConvDw3x3Int8Run, this, conv_param_->thread_num_);
195   if (ret != RET_OK) {
196     ms_context_->allocator->Free(buffer_);
197     MS_LOG(ERROR) << "ConvDwInt8Run error: error_code[" << ret << "]";
198     return RET_ERROR;
199   }
200   ms_context_->allocator->Free(buffer_);
201   return RET_OK;
202 }
203 }  // namespace mindspore::kernel
204