1 /**
2 * Copyright 2023 Huawei Technologies Co., Ltd
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either convolutionress or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "nnacl/kernel/convolution_delegate.h"
18 #include "nnacl/conv_parameter.h"
19 #include "nnacl/tensor_c_utils.h"
20 #include "nnacl/base/conv_common_base.h"
21 #include "nnacl/kernel/group_convolution.h"
22 #include "nnacl/kernel/convolution_depthwise.h"
23 #include "nnacl/kernel/convolution_1x1.h"
24 #include "nnacl/kernel/convolution_im2col.h"
25 #include "nnacl/kernel/convolution_winograd.h"
26 #include "nnacl/fp32/conv_winograd_fp32.h"
27 #include "nnacl/kernel/convolution_depthwise_sw.h"
28 #ifdef ENABLE_AVX
29 #include "nnacl/kernel/convolution_sw_1x1.h"
30 #include "nnacl/kernel/convolution_sw_avx.h"
31 #include "nnacl/kernel/convolution_depthwise_sw_avx.h"
32 #endif
33 #ifdef ENABLE_ARM64
34 #include "nnacl/kernel/convolution_depthwise_indirect.h"
35 #include "nnacl/kernel/convolution_sw_arm64.h"
36 #include "nnacl/fp32/conv_sw_arm64_fp32.h"
37 #endif
38 #if defined(ENABLE_ARM) || (defined(ENABLE_SSE) && !defined(ENABLE_AVX))
39 #include "nnacl/kernel/convolution_depthwise_3x3.h"
40 #include "nnacl/fp32/conv_depthwise_fp32.h"
41 #endif
42
43 #define MaxDwConvSWSize 32
44
ConvolutionDelegateCopyData(const TensorC * tensor)45 float *ConvolutionDelegateCopyData(const TensorC *tensor) {
46 NNACL_CHECK_NULL_RETURN_NULL(tensor);
47 NNACL_CHECK_NULL_RETURN_NULL(tensor->data_);
48
49 float *data = (float *)malloc(GetSize(tensor));
50 NNACL_MALLOC_CHECK_NULL_RETURN_NULL(data);
51
52 (void)memcpy(data, tensor->data_, GetSize(tensor));
53 return data;
54 }
55
ConvolutionDelegateGetWeightData(ConvolutionDelegateStruct * convolution_delegate)56 int ConvolutionDelegateGetWeightData(ConvolutionDelegateStruct *convolution_delegate) {
57 if (convolution_delegate->conv_.base_.in_[SECOND_INPUT]->data_ == NULL) {
58 return NNACL_OK;
59 }
60 if (convolution_delegate->conv_.infershape_done_) {
61 convolution_delegate->origin_weight_ = convolution_delegate->conv_.base_.in_[SECOND_INPUT]->data_;
62 NNACL_CHECK_NULL_RETURN_ERR(convolution_delegate->origin_weight_);
63 convolution_delegate->need_free_weight_ = false;
64 return NNACL_OK;
65 }
66 convolution_delegate->origin_weight_ =
67 ConvolutionDelegateCopyData(convolution_delegate->conv_.base_.in_[SECOND_INPUT]);
68 NNACL_MALLOC_CHECK_NULL_RETURN_ERR(convolution_delegate->origin_weight_);
69 convolution_delegate->need_free_weight_ = true;
70 return NNACL_OK;
71 }
72
ConvolutionDelegateGetBiasData(ConvolutionDelegateStruct * convolution_delegate)73 int ConvolutionDelegateGetBiasData(ConvolutionDelegateStruct *convolution_delegate) {
74 if (convolution_delegate->conv_.base_.in_size_ != THREE_TENSOR) {
75 convolution_delegate->origin_bias_ = NULL;
76 convolution_delegate->need_free_bias_ = false;
77 return NNACL_OK;
78 }
79
80 if (convolution_delegate->conv_.infershape_done_) {
81 NNACL_CHECK_NULL_RETURN_ERR(convolution_delegate->conv_.base_.in_[THIRD_INPUT]);
82 convolution_delegate->origin_bias_ = convolution_delegate->conv_.base_.in_[THIRD_INPUT]->data_;
83 NNACL_CHECK_NULL_RETURN_ERR(convolution_delegate->origin_bias_);
84 convolution_delegate->need_free_bias_ = false;
85 return NNACL_OK;
86 }
87
88 convolution_delegate->origin_bias_ = ConvolutionDelegateCopyData(convolution_delegate->conv_.base_.in_[THIRD_INPUT]);
89 NNACL_MALLOC_CHECK_NULL_RETURN_ERR(convolution_delegate->origin_bias_);
90 convolution_delegate->need_free_bias_ = true;
91 return NNACL_OK;
92 }
93
ConvolutionDelegateGetWeightAndBias(ConvolutionDelegateStruct * convolution_delegate)94 int ConvolutionDelegateGetWeightAndBias(ConvolutionDelegateStruct *convolution_delegate) {
95 int ret = ConvolutionDelegateGetWeightData(convolution_delegate);
96 if (ret != NNACL_OK) {
97 return ret;
98 }
99
100 return ConvolutionDelegateGetBiasData(convolution_delegate);
101 }
102
ConvolutionDelegateConvNC4KernelSelect(ConvolutionDelegateStruct * convolution_delegate)103 ConvolutionBaseStruct *ConvolutionDelegateConvNC4KernelSelect(ConvolutionDelegateStruct *convolution_delegate) {
104 /* runtime nc4hw4 pass
105 * arm64: conv1x1 conv_Im2col support nc4
106 * Avx: conv_Im2col support nc4
107 * */
108 ConvParameter *conv_param = (ConvParameter *)convolution_delegate->conv_.base_.param_;
109 NNACL_CHECK_NULL_RETURN_NULL(conv_param);
110
111 #ifdef ENABLE_ARM64
112 if (conv_param->kernel_h_ == 1 && conv_param->kernel_w_ == 1) {
113 ConvolutionBaseStruct *conv1x1 = CreateConvolution1x1(conv_param);
114 return conv1x1;
115 }
116 #endif
117
118 #if defined(ENABLE_ARM64) || defined(ENABLE_AVX)
119 ConvolutionBaseStruct *conv_im2col = CreateConvolutionIm2Col(&convolution_delegate->conv_.base_, conv_param);
120 return conv_im2col;
121 #endif
122
123 return NULL;
124 }
125
ConvolutionDelegateConvNHWCKernelSelect(ConvolutionDelegateStruct * convolution_delegate)126 ConvolutionBaseStruct *ConvolutionDelegateConvNHWCKernelSelect(ConvolutionDelegateStruct *convolution_delegate) {
127 ConvParameter *conv_param = (ConvParameter *)convolution_delegate->conv_.base_.param_;
128 NNACL_CHECK_NULL_RETURN_NULL(conv_param);
129
130 ConvolutionBaseStruct *conv = NULL;
131
132 int out_unit;
133 if (CheckIfUseWinograd(&out_unit, conv_param)) {
134 conv = CreateConvolutionWinograd(conv_param, out_unit);
135 }
136
137 #ifdef ENABLE_AVX
138 if (conv == NULL && CheckAvxUseSW1x1Conv(conv_param)) {
139 conv = CreateConvolutionSW1x1(conv_param, convolution_delegate->input_const_, convolution_delegate->weight_const_);
140 }
141
142 if (conv == NULL && CheckAvxUseSWConv(conv_param, convolution_delegate->conv_.base_.thread_nr_)) {
143 conv = CreateConvolutionSWAVX(conv_param);
144 }
145 #endif
146
147 #ifdef ENABLE_ARM64
148 if (conv == NULL && CheckArm64UseSWConv(conv_param)) {
149 conv = CreateConvolutionSWARM64(conv_param);
150 }
151 #endif
152
153 if (conv == NULL) {
154 if (conv_param->kernel_h_ == 1 && conv_param->kernel_w_ == 1) {
155 conv = CreateConvolution1x1(conv_param);
156 } else {
157 conv = CreateConvolutionIm2Col(&convolution_delegate->conv_.base_, conv_param);
158 }
159 }
160 return conv;
161 }
162
ConvolutionDelegateConvolutionSelect(ConvolutionDelegateStruct * convolution_delegate)163 ConvolutionBaseStruct *ConvolutionDelegateConvolutionSelect(ConvolutionDelegateStruct *convolution_delegate) {
164 ConvolutionBaseStruct *conv;
165 if (convolution_delegate->conv_.base_.out_[OUTPUT_INDEX]->format_ == Format_NC4HW4) {
166 conv = ConvolutionDelegateConvNC4KernelSelect(convolution_delegate);
167 } else {
168 conv = ConvolutionDelegateConvNHWCKernelSelect(convolution_delegate);
169 }
170 if (conv == NULL) {
171 return NULL;
172 }
173
174 conv->base_.InferShape = convolution_delegate->conv_.base_.InferShape;
175 conv->base_.UpdateThread = convolution_delegate->conv_.base_.UpdateThread;
176 conv->base_.env_ = convolution_delegate->conv_.base_.env_;
177 conv->base_.param_ = convolution_delegate->conv_.base_.param_;
178 conv->base_.thread_nr_ = convolution_delegate->conv_.base_.thread_nr_;
179 conv->base_.train_session_ = convolution_delegate->conv_.base_.train_session_;
180 conv->base_.in_ = convolution_delegate->conv_.base_.in_;
181 conv->base_.in_size_ = convolution_delegate->conv_.base_.in_size_;
182 conv->base_.out_ = convolution_delegate->conv_.base_.out_;
183 conv->base_.out_size_ = convolution_delegate->conv_.base_.out_size_;
184
185 conv->infershape_done_ = convolution_delegate->conv_.infershape_done_;
186 conv->shaing_manager_ = convolution_delegate->conv_.shaing_manager_;
187 conv->get_sharing_weight_ = convolution_delegate->conv_.get_sharing_weight_;
188 conv->free_sharing_weight_ = convolution_delegate->conv_.free_sharing_weight_;
189 conv->is_sharing_pack_ = convolution_delegate->conv_.is_sharing_pack_;
190
191 conv->origin_weight_ = convolution_delegate->origin_weight_;
192 conv->origin_bias_ = convolution_delegate->origin_bias_;
193 return conv;
194 }
195
ConvolutionDelegateFreeCopiedData(ConvolutionDelegateStruct * convolution_delegate)196 void ConvolutionDelegateFreeCopiedData(ConvolutionDelegateStruct *convolution_delegate) {
197 if (convolution_delegate->origin_weight_ != NULL && convolution_delegate->need_free_weight_) {
198 free(convolution_delegate->origin_weight_);
199 }
200 convolution_delegate->origin_weight_ = NULL;
201 convolution_delegate->conv_.origin_weight_ = NULL;
202 convolution_delegate->need_free_weight_ = false;
203
204 if (convolution_delegate->origin_bias_ != NULL && convolution_delegate->need_free_bias_) {
205 free(convolution_delegate->origin_bias_);
206 }
207 convolution_delegate->origin_bias_ = NULL;
208 convolution_delegate->conv_.origin_bias_ = NULL;
209 convolution_delegate->need_free_bias_ = false;
210 }
211
ConvolutionDelegateResize(struct KernelBase * self)212 int ConvolutionDelegateResize(struct KernelBase *self) {
213 ConvolutionDelegateStruct *convolution_delegate = (ConvolutionDelegateStruct *)self;
214 NNACL_CHECK_NULL_RETURN_ERR(convolution_delegate);
215
216 if (convolution_delegate->convolution_ == NULL) {
217 convolution_delegate->convolution_ = ConvolutionDelegateConvolutionSelect(convolution_delegate);
218 NNACL_MALLOC_CHECK_NULL_RETURN_ERR(convolution_delegate->convolution_);
219 (void)ConvBaseUpdateComputeInfo(convolution_delegate->convolution_);
220 int ret = convolution_delegate->convolution_->base_.Prepare(&convolution_delegate->convolution_->base_);
221 if (ret != NNACL_OK) {
222 return ret;
223 }
224 }
225
226 (void)ConvBaseUpdateComputeInfo(convolution_delegate->convolution_);
227 int ret = convolution_delegate->convolution_->base_.Resize(&convolution_delegate->convolution_->base_);
228 if (ret != NNACL_OK) {
229 return ret;
230 }
231
232 ConvolutionDelegateFreeCopiedData(convolution_delegate);
233 return NNACL_OK;
234 }
235
ConvolutionDelegatePrepare(struct KernelBase * self)236 int ConvolutionDelegatePrepare(struct KernelBase *self) {
237 ConvolutionDelegateStruct *convolution_delegate = (ConvolutionDelegateStruct *)self;
238 NNACL_CHECK_NULL_RETURN_ERR(convolution_delegate);
239
240 NNACL_CHECK_FALSE(self->in_size_ < TWO_TENSOR, NNACL_INPUT_TENSOR_ERROR);
241 NNACL_CHECK_FALSE(self->out_size_ < ONE_TENSOR, NNACL_OUTPUT_TENSOR_ERROR);
242 NNACL_CHECK_NULL_RETURN_ERR(self->in_[SECOND_INPUT]);
243
244 NNACL_CHECK_FALSE(self->in_[SECOND_INPUT]->data_type_ != kNumberTypeFloat32 &&
245 self->in_[SECOND_INPUT]->data_type_ != kNumberTypeFloat16,
246 NNACL_CONVOLUTION_WEIGHT_DATATYPE_INVALID);
247 NNACL_CHECK_FALSE(self->in_size_ == THREE_TENSOR && self->in_[THIRD_INPUT] != NULL &&
248 self->in_[THIRD_INPUT]->data_type_ != kNumberTypeFloat32,
249 NNACL_CONVOLUTION_BIAS_DATATYPE_INVALID);
250
251 convolution_delegate->input_const_ = IsConst(self->in_[FIRST_INPUT]) && !self->train_session_;
252 convolution_delegate->weight_const_ = IsConst(self->in_[SECOND_INPUT]) && !self->train_session_;
253
254 return ConvolutionDelegateGetWeightAndBias(convolution_delegate);
255 }
256
ConvolutionDelegateRelease(struct KernelBase * self)257 int ConvolutionDelegateRelease(struct KernelBase *self) {
258 ConvolutionDelegateStruct *convolution_delegate = (ConvolutionDelegateStruct *)self;
259 NNACL_CHECK_NULL_RETURN_ERR(convolution_delegate);
260 if (convolution_delegate->convolution_ != NULL) {
261 (void)convolution_delegate->convolution_->base_.Release(&convolution_delegate->convolution_->base_);
262 free(convolution_delegate->convolution_);
263 convolution_delegate->convolution_ = NULL;
264 }
265 return NNACL_OK;
266 }
267
ConvolutionDelegateCompute(struct KernelBase * self)268 int ConvolutionDelegateCompute(struct KernelBase *self) {
269 ConvolutionDelegateStruct *convolution_delegate = (ConvolutionDelegateStruct *)self;
270 NNACL_CHECK_NULL_RETURN_ERR(convolution_delegate);
271 NNACL_CHECK_NULL_RETURN_ERR(convolution_delegate->convolution_);
272
273 convolution_delegate->convolution_->base_.workspace_ = convolution_delegate->conv_.base_.workspace_;
274 return convolution_delegate->convolution_->base_.Compute(&convolution_delegate->convolution_->base_);
275 }
276
CreateConvlutionDelegate(ConvParameter * conv_param)277 KernelBase *CreateConvlutionDelegate(ConvParameter *conv_param) {
278 ConvolutionDelegateStruct *delegate = (ConvolutionDelegateStruct *)malloc(sizeof(ConvolutionDelegateStruct));
279 NNACL_MALLOC_CHECK_NULL_RETURN_NULL(delegate);
280 memset(delegate, 0, sizeof(ConvolutionDelegateStruct));
281 delegate->conv_.base_.Prepare = ConvolutionDelegatePrepare;
282 delegate->conv_.base_.Resize = ConvolutionDelegateResize;
283 delegate->conv_.base_.Release = ConvolutionDelegateRelease;
284 delegate->conv_.base_.Compute = ConvolutionDelegateCompute;
285 return (KernelBase *)delegate;
286 }
287
CreateConvolutionDepthwise(ConvParameter * conv_param)288 KernelBase *CreateConvolutionDepthwise(ConvParameter *conv_param) {
289 NNACL_CHECK_NULL_RETURN_NULL(conv_param);
290 KernelBase *kernel = NULL;
291
292 if (conv_param->dynamic_shape_) {
293 kernel = CreateConvDw(conv_param);
294 if (kernel != NULL) {
295 return kernel;
296 }
297 }
298
299 #ifdef ENABLE_AVX
300 kernel = CreateConvDwSWAVX(conv_param);
301 if (kernel != NULL) {
302 return kernel;
303 }
304 #endif
305
306 #if defined(ENABLE_ARM) || (defined(ENABLE_SSE) && !defined(ENABLE_AVX))
307 if (CheckConvDw1DWinograd(conv_param, conv_param->thread_num_)) {
308 kernel = CreateConvDw3x3(conv_param);
309 if (kernel != NULL) {
310 return kernel;
311 }
312 }
313 #endif
314
315 #ifdef ENABLE_ARM64
316 if (CheckConvDwUseIndirectBuffer(conv_param)) {
317 kernel = CreateConvDwIndirect(conv_param);
318 if (kernel != NULL) {
319 return kernel;
320 }
321 }
322 #endif
323
324 if (conv_param->input_channel_ < MaxDwConvSWSize) {
325 kernel = CreateConvDwSW(conv_param);
326 if (kernel != NULL) {
327 return kernel;
328 }
329 }
330
331 kernel = CreateConvDw(conv_param);
332 return kernel;
333 }
334
CreateConv2DFusion(OpParameter * param,int data_type)335 KernelBase *CreateConv2DFusion(OpParameter *param, int data_type) {
336 ConvParameter *conv_param = (ConvParameter *)param;
337 conv_param->thread_num_ = param->thread_num_;
338 KernelBase *kernel;
339 if (conv_param->group_ == 1) {
340 kernel = CreateConvlutionDelegate(conv_param);
341 } else if (conv_param->group_ == conv_param->input_channel_ && conv_param->group_ == conv_param->output_channel_) {
342 kernel = CreateConvolutionDepthwise(conv_param);
343 } else {
344 kernel = CreateGroupConvolution(conv_param, data_type);
345 }
346
347 if (kernel == NULL) {
348 return NULL;
349 }
350
351 ConvolutionBaseStruct *conv = (ConvolutionBaseStruct *)kernel;
352 (void)ConvBaseUpdateParamInfo(&conv->compute_, conv_param);
353
354 return kernel;
355 }
356
357 REG_KERNEL_CREATOR(PrimType_Conv2DFusion, kNumberTypeFloat32, CreateConv2DFusion)
358