• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright 2020 Huawei Technologies Co., Ltd
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "src/runtime/kernel/arm/int8/deconvolution_depthwise_int8.h"
18 #include "include/errorcode.h"
19 #include "nnacl/int8/conv_depthwise_int8.h"
20 
21 using mindspore::lite::RET_ERROR;
22 using mindspore::lite::RET_OK;
23 
24 namespace mindspore::kernel {
~DeconvolutionDepthwiseInt8CPUKernel()25 DeconvolutionDepthwiseInt8CPUKernel::~DeconvolutionDepthwiseInt8CPUKernel() {
26   if (sliding_ != nullptr) {
27     delete sliding_;
28     sliding_ = nullptr;
29   }
30   if (packed_weight_ != nullptr) {
31     delete packed_weight_;
32     packed_weight_ = nullptr;
33   }
34   FreeQuantParam();
35 }
36 
InitWeightBias()37 int DeconvolutionDepthwiseInt8CPUKernel::InitWeightBias() {
38   // init weight: int8 -> int16
39   // o, h, w, i -> o/8, h, w, i, 8; o equals to group, i equals to 1
40   auto weight_tensor = in_tensors_.at(kWeightIndex);
41   CHECK_NULL_RETURN(weight_tensor);
42   auto origin_weight = reinterpret_cast<int8_t *>(weight_tensor->data());
43   CHECK_NULL_RETURN(origin_weight);
44   if (origin_weight == nullptr) {
45     MS_LOG(ERROR) << "origin_weight nullptr";
46     return RET_ERROR;
47   }
48   int OC4 = UP_DIV(weight_tensor->Batch(), C4NUM);
49   int pack_weight_size = C4NUM * OC4 * weight_tensor->Height() * weight_tensor->Width();
50   packed_weight_ = reinterpret_cast<int16_t *>(malloc(pack_weight_size * sizeof(int16_t)));
51   if (packed_weight_ == nullptr) {
52     MS_LOG(ERROR) << "Malloc buffer failed.";
53     return RET_ERROR;
54   }
55   PackDeconvDepthwiseInt8Weight(origin_weight, packed_weight_, weight_tensor->Height() * weight_tensor->Width(),
56                                 weight_tensor->Batch(), &(conv_param_->conv_quant_arg_));
57 
58   bias_data_ = reinterpret_cast<int32_t *>(malloc(C4NUM * OC4 * sizeof(int32_t)));
59   if (bias_data_ == nullptr) {
60     MS_LOG(ERROR) << "Malloc buffer failed.";
61     return RET_ERROR;
62   }
63   memset(bias_data_, 0, C4NUM * OC4 * sizeof(int32_t));
64   if (in_tensors_.size() == kInputSize2) {
65     auto bias_tensor = in_tensors_.at(kBiasIndex);
66     CHECK_NULL_RETURN(bias_tensor);
67     auto ori_bias = reinterpret_cast<int32_t *>(bias_tensor->data());
68     CHECK_NULL_RETURN(ori_bias);
69     memcpy(bias_data_, ori_bias, bias_tensor->ElementsNum() * sizeof(int32_t));
70   }
71   conv_param_->thread_num_ = MSMIN(thread_count_, OC4);
72   return RET_OK;
73 }
74 
InitSlideParam()75 int DeconvolutionDepthwiseInt8CPUKernel::InitSlideParam() {
76   MS_CHECK_TRUE_RET(in_tensors_.front()->shape().size() == DIMENSION_4D, RET_ERROR);
77   MS_CHECK_TRUE_RET(out_tensors_.front()->shape().size() == DIMENSION_4D, RET_ERROR);
78 
79   conv_param_->input_batch_ = out_tensors_.front()->shape().at(kNHWC_N);
80   conv_param_->input_h_ = out_tensors_.front()->shape().at(kNHWC_H);
81   conv_param_->input_w_ = out_tensors_.front()->shape().at(kNHWC_W);
82   conv_param_->input_channel_ = C4NUM;
83   conv_param_->output_batch_ = in_tensors_.front()->shape().at(kNHWC_N);
84   conv_param_->output_h_ = in_tensors_.front()->shape().at(kNHWC_H);
85   conv_param_->output_w_ = in_tensors_.front()->shape().at(kNHWC_W);
86   conv_param_->output_channel_ = in_tensors_.front()->shape().at(kNHWC_C);
87 
88   InitSlidingParamConvDw(sliding_, conv_param_, C4NUM);
89 
90   sliding_->in_h_step_ = conv_param_->input_w_ * C4NUM;
91   sliding_->in_sh_step_ = conv_param_->input_w_ * C4NUM * conv_param_->stride_h_;    // stride H
92   sliding_->in_sw_step_ = C4NUM * conv_param_->stride_h_;                            // stride W
93   sliding_->in_kh_step_ = conv_param_->input_w_ * C4NUM * conv_param_->dilation_h_;  // kernel H
94   sliding_->in_kw_step_ = C4NUM * conv_param_->dilation_w_;                          // kernel W
95   return RET_OK;
96 }
97 
InitBuffer()98 int DeconvolutionDepthwiseInt8CPUKernel::InitBuffer() {
99   int pack_input_size = conv_param_->input_batch_ * conv_param_->input_h_ * conv_param_->input_w_ * C4NUM *
100                         UP_DIV(conv_param_->input_channel_, C4NUM);
101   packed_input_ = reinterpret_cast<int16_t *>(ms_context_->allocator->Malloc(pack_input_size * sizeof(int16_t)));
102   if (packed_input_ == nullptr) {
103     MS_LOG(ERROR) << "Malloc buffer failed.";
104     return RET_ERROR;
105   }
106 
107   if (conv_param_->input_channel_ % C4NUM != 0) {
108     need_align_ = true;
109     int pack_output_size = conv_param_->output_batch_ * conv_param_->output_h_ * conv_param_->output_w_ * C4NUM *
110                            UP_DIV(conv_param_->output_channel_, C4NUM);
111     packed_output_ = reinterpret_cast<int8_t *>(ms_context_->allocator->Malloc(pack_output_size * sizeof(int8_t)));
112     if (packed_output_ == nullptr) {
113       MS_LOG(ERROR) << "Malloc buffer failed.";
114       return RET_ERROR;
115     }
116     memset(packed_output_, 0, pack_output_size * sizeof(int8_t));
117   }
118 
119   output_buffer_ = reinterpret_cast<int32_t *>(ms_context_->allocator->Malloc(
120     conv_param_->output_h_ * conv_param_->output_w_ * C4NUM * conv_param_->thread_num_ * sizeof(int32_t)));
121   if (output_buffer_ == nullptr) {
122     MS_LOG(ERROR) << "Malloc buffer failed.";
123     return RET_ERROR;
124   }
125   return RET_OK;
126 }
127 
Init()128 int DeconvolutionDepthwiseInt8CPUKernel::Init() {
129   CHECK_LESS_RETURN(in_tensors_.size(), C2NUM);
130   CHECK_NULL_RETURN(in_tensors_.at(kInputIndex));
131   CHECK_NULL_RETURN(in_tensors_.at(kWeightIndex));
132   CHECK_NULL_RETURN(conv_param_);
133 
134   sliding_ = new (std::nothrow) SlidingWindowParam;
135   if (sliding_ == nullptr) {
136     MS_LOG(ERROR) << "new SlidingWindowParam fail!";
137     return RET_ERROR;
138   }
139   auto ret = ConvolutionBaseCPUKernel::SetQuantParam();
140   if (ret != RET_OK) {
141     MS_LOG(ERROR) << "Set quant param failed.";
142     return ret;
143   }
144   ret = InitWeightBias();
145   if (ret != RET_OK) {
146     MS_LOG(ERROR) << "Deconv Depthwise int8 InitWeightBias error!";
147     return ret;
148   }
149   if (!InferShapeDone()) {
150     return RET_OK;
151   }
152   return ReSize();
153 }
154 
ReSize()155 int DeconvolutionDepthwiseInt8CPUKernel::ReSize() {
156   CHECK_LESS_RETURN(in_tensors_.size(), 1);
157   CHECK_LESS_RETURN(out_tensors_.size(), 1);
158   CHECK_NULL_RETURN(in_tensors_.front());
159   CHECK_NULL_RETURN(out_tensors_.front());
160   CHECK_NULL_RETURN(conv_param_);
161   CHECK_NULL_RETURN(sliding_);
162 
163   InitSlideParam();
164   ConvolutionBaseCPUKernel::Init();
165   return RET_OK;
166 }
167 
DoExecute(int task_id)168 int DeconvolutionDepthwiseInt8CPUKernel::DoExecute(int task_id) {
169   auto buffer = output_buffer_ + conv_param_->output_h_ * conv_param_->output_w_ * C4NUM * task_id;
170   DeconvDwInt8(packed_output_, buffer, packed_input_, packed_weight_, reinterpret_cast<int32_t *>(bias_data_),
171                conv_param_, sliding_, task_id);
172   return RET_OK;
173 }
174 
DeconvDwInt8Run(void * cdata,int task_id,float lhs_scale,float rhs_scale)175 int DeconvDwInt8Run(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
176   auto deconv_dw_int8 = reinterpret_cast<DeconvolutionDepthwiseInt8CPUKernel *>(cdata);
177   auto ret = deconv_dw_int8->DoExecute(task_id);
178   if (ret != RET_OK) {
179     MS_LOG(ERROR) << "DeconvolutionDepthwiseInt8Run error task_id[" << task_id << "] error_code[" << ret << "]";
180     return RET_ERROR;
181   }
182   return RET_OK;
183 }
184 
Run()185 int DeconvolutionDepthwiseInt8CPUKernel::Run() {
186   if (conv_param_->input_channel_ != conv_param_->output_channel_) {
187     MS_LOG(ERROR) << "Only support input channel equals output channel.";
188     return RET_ERROR;
189   }
190   auto ret = InitBuffer();
191   if (ret != RET_OK) {
192     MS_LOG(ERROR) << "Deconv Depthwise int8 InitBuffer error!";
193     ms_context_->allocator->Free(packed_input_);
194     packed_input_ = nullptr;
195     ms_context_->allocator->Free(output_buffer_);
196     output_buffer_ = nullptr;
197     if (need_align_) {
198       ms_context_->allocator->Free(packed_output_);
199     }
200     return ret;
201   }
202 
203   CHECK_NULL_RETURN(packed_weight_);
204   CHECK_NULL_RETURN(bias_data_);
205 
206   auto input_tensor = in_tensors_.at(kInputIndex);
207   auto output_tensor = out_tensors_.at(kOutputIndex);
208   auto input_addr = reinterpret_cast<int8_t *>(input_tensor->data());
209   auto output_addr = reinterpret_cast<int8_t *>(output_tensor->data());
210   CHECK_NULL_RETURN(input_addr);
211   CHECK_NULL_RETURN(output_addr);
212 
213   // pack input, assume input format: NHWC -> NHWC4
214   PackDepthwiseInt8Input(input_addr, packed_input_, conv_param_);
215 
216   if (!need_align_) {
217     memset(output_addr, 0, out_tensors_.at(kOutputIndex)->ElementsNum() * sizeof(int8_t));
218     packed_output_ = output_addr;
219   }
220 
221   ret = ParallelLaunch(this->ms_context_, DeconvDwInt8Run, this, conv_param_->thread_num_);
222   if (ret != RET_OK) {
223     MS_LOG(ERROR) << "DeconvDwInt8Run error: error_code[" << ret << "]";
224   }
225 
226   if (need_align_) {
227     PackNHWC4ToNHWCInt8(packed_output_, output_addr, conv_param_->output_batch_,
228                         conv_param_->output_h_ * conv_param_->output_w_, conv_param_->output_channel_);
229     ms_context_->allocator->Free(packed_output_);
230     packed_output_ = nullptr;
231   }
232   ms_context_->allocator->Free(packed_input_);
233   packed_input_ = nullptr;
234   ms_context_->allocator->Free(output_buffer_);
235   output_buffer_ = nullptr;
236   return ret;
237 }
238 }  // namespace mindspore::kernel
239