• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright 2023 Huawei Technologies Co., Ltd
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either convolutionress or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "nnacl/kernel/convolution_delegate.h"
18 #include "nnacl/conv_parameter.h"
19 #include "nnacl/tensor_c_utils.h"
20 #include "nnacl/base/conv_common_base.h"
21 #include "nnacl/kernel/group_convolution.h"
22 #include "nnacl/kernel/convolution_depthwise.h"
23 #include "nnacl/kernel/convolution_1x1.h"
24 #include "nnacl/kernel/convolution_im2col.h"
25 #include "nnacl/kernel/convolution_winograd.h"
26 #include "nnacl/fp32/conv_winograd_fp32.h"
27 #include "nnacl/kernel/convolution_depthwise_sw.h"
28 #ifdef ENABLE_AVX
29 #include "nnacl/kernel/convolution_sw_1x1.h"
30 #include "nnacl/kernel/convolution_sw_avx.h"
31 #include "nnacl/kernel/convolution_depthwise_sw_avx.h"
32 #endif
33 #ifdef ENABLE_ARM64
34 #include "nnacl/kernel/convolution_depthwise_indirect.h"
35 #include "nnacl/kernel/convolution_sw_arm64.h"
36 #include "nnacl/fp32/conv_sw_arm64_fp32.h"
37 #endif
38 #if defined(ENABLE_ARM) || (defined(ENABLE_SSE) && !defined(ENABLE_AVX))
39 #include "nnacl/kernel/convolution_depthwise_3x3.h"
40 #include "nnacl/fp32/conv_depthwise_fp32.h"
41 #endif
42 
43 #define MaxDwConvSWSize 32
44 
ConvolutionDelegateCopyData(const TensorC * tensor)45 float *ConvolutionDelegateCopyData(const TensorC *tensor) {
46   NNACL_CHECK_NULL_RETURN_NULL(tensor);
47   NNACL_CHECK_NULL_RETURN_NULL(tensor->data_);
48 
49   float *data = (float *)malloc(GetSize(tensor));
50   NNACL_MALLOC_CHECK_NULL_RETURN_NULL(data);
51 
52   (void)memcpy(data, tensor->data_, GetSize(tensor));
53   return data;
54 }
55 
ConvolutionDelegateGetWeightData(ConvolutionDelegateStruct * convolution_delegate)56 int ConvolutionDelegateGetWeightData(ConvolutionDelegateStruct *convolution_delegate) {
57   if (convolution_delegate->conv_.base_.in_[SECOND_INPUT]->data_ == NULL) {
58     return NNACL_OK;
59   }
60   if (convolution_delegate->conv_.infershape_done_) {
61     convolution_delegate->origin_weight_ = convolution_delegate->conv_.base_.in_[SECOND_INPUT]->data_;
62     NNACL_CHECK_NULL_RETURN_ERR(convolution_delegate->origin_weight_);
63     convolution_delegate->need_free_weight_ = false;
64     return NNACL_OK;
65   }
66   convolution_delegate->origin_weight_ =
67     ConvolutionDelegateCopyData(convolution_delegate->conv_.base_.in_[SECOND_INPUT]);
68   NNACL_MALLOC_CHECK_NULL_RETURN_ERR(convolution_delegate->origin_weight_);
69   convolution_delegate->need_free_weight_ = true;
70   return NNACL_OK;
71 }
72 
ConvolutionDelegateGetBiasData(ConvolutionDelegateStruct * convolution_delegate)73 int ConvolutionDelegateGetBiasData(ConvolutionDelegateStruct *convolution_delegate) {
74   if (convolution_delegate->conv_.base_.in_size_ != THREE_TENSOR) {
75     convolution_delegate->origin_bias_ = NULL;
76     convolution_delegate->need_free_bias_ = false;
77     return NNACL_OK;
78   }
79 
80   if (convolution_delegate->conv_.infershape_done_) {
81     NNACL_CHECK_NULL_RETURN_ERR(convolution_delegate->conv_.base_.in_[THIRD_INPUT]);
82     convolution_delegate->origin_bias_ = convolution_delegate->conv_.base_.in_[THIRD_INPUT]->data_;
83     NNACL_CHECK_NULL_RETURN_ERR(convolution_delegate->origin_bias_);
84     convolution_delegate->need_free_bias_ = false;
85     return NNACL_OK;
86   }
87 
88   convolution_delegate->origin_bias_ = ConvolutionDelegateCopyData(convolution_delegate->conv_.base_.in_[THIRD_INPUT]);
89   NNACL_MALLOC_CHECK_NULL_RETURN_ERR(convolution_delegate->origin_bias_);
90   convolution_delegate->need_free_bias_ = true;
91   return NNACL_OK;
92 }
93 
ConvolutionDelegateGetWeightAndBias(ConvolutionDelegateStruct * convolution_delegate)94 int ConvolutionDelegateGetWeightAndBias(ConvolutionDelegateStruct *convolution_delegate) {
95   int ret = ConvolutionDelegateGetWeightData(convolution_delegate);
96   if (ret != NNACL_OK) {
97     return ret;
98   }
99 
100   return ConvolutionDelegateGetBiasData(convolution_delegate);
101 }
102 
ConvolutionDelegateConvNC4KernelSelect(ConvolutionDelegateStruct * convolution_delegate)103 ConvolutionBaseStruct *ConvolutionDelegateConvNC4KernelSelect(ConvolutionDelegateStruct *convolution_delegate) {
104   /* runtime nc4hw4 pass
105    * arm64: conv1x1 conv_Im2col support nc4
106    * Avx: conv_Im2col support nc4
107    * */
108   ConvParameter *conv_param = (ConvParameter *)convolution_delegate->conv_.base_.param_;
109   NNACL_CHECK_NULL_RETURN_NULL(conv_param);
110 
111 #ifdef ENABLE_ARM64
112   if (conv_param->kernel_h_ == 1 && conv_param->kernel_w_ == 1) {
113     ConvolutionBaseStruct *conv1x1 = CreateConvolution1x1(conv_param);
114     return conv1x1;
115   }
116 #endif
117 
118 #if defined(ENABLE_ARM64) || defined(ENABLE_AVX)
119   ConvolutionBaseStruct *conv_im2col = CreateConvolutionIm2Col(&convolution_delegate->conv_.base_, conv_param);
120   return conv_im2col;
121 #endif
122 
123   return NULL;
124 }
125 
ConvolutionDelegateConvNHWCKernelSelect(ConvolutionDelegateStruct * convolution_delegate)126 ConvolutionBaseStruct *ConvolutionDelegateConvNHWCKernelSelect(ConvolutionDelegateStruct *convolution_delegate) {
127   ConvParameter *conv_param = (ConvParameter *)convolution_delegate->conv_.base_.param_;
128   NNACL_CHECK_NULL_RETURN_NULL(conv_param);
129 
130   ConvolutionBaseStruct *conv = NULL;
131 
132   int out_unit;
133   if (CheckIfUseWinograd(&out_unit, conv_param)) {
134     conv = CreateConvolutionWinograd(conv_param, out_unit);
135   }
136 
137 #ifdef ENABLE_AVX
138   if (conv == NULL && CheckAvxUseSW1x1Conv(conv_param)) {
139     conv = CreateConvolutionSW1x1(conv_param, convolution_delegate->input_const_, convolution_delegate->weight_const_);
140   }
141 
142   if (conv == NULL && CheckAvxUseSWConv(conv_param, convolution_delegate->conv_.base_.thread_nr_)) {
143     conv = CreateConvolutionSWAVX(conv_param);
144   }
145 #endif
146 
147 #ifdef ENABLE_ARM64
148   if (conv == NULL && CheckArm64UseSWConv(conv_param)) {
149     conv = CreateConvolutionSWARM64(conv_param);
150   }
151 #endif
152 
153   if (conv == NULL) {
154     if (conv_param->kernel_h_ == 1 && conv_param->kernel_w_ == 1) {
155       conv = CreateConvolution1x1(conv_param);
156     } else {
157       conv = CreateConvolutionIm2Col(&convolution_delegate->conv_.base_, conv_param);
158     }
159   }
160   return conv;
161 }
162 
ConvolutionDelegateConvolutionSelect(ConvolutionDelegateStruct * convolution_delegate)163 ConvolutionBaseStruct *ConvolutionDelegateConvolutionSelect(ConvolutionDelegateStruct *convolution_delegate) {
164   ConvolutionBaseStruct *conv;
165   if (convolution_delegate->conv_.base_.out_[OUTPUT_INDEX]->format_ == Format_NC4HW4) {
166     conv = ConvolutionDelegateConvNC4KernelSelect(convolution_delegate);
167   } else {
168     conv = ConvolutionDelegateConvNHWCKernelSelect(convolution_delegate);
169   }
170   if (conv == NULL) {
171     return NULL;
172   }
173 
174   conv->base_.InferShape = convolution_delegate->conv_.base_.InferShape;
175   conv->base_.UpdateThread = convolution_delegate->conv_.base_.UpdateThread;
176   conv->base_.env_ = convolution_delegate->conv_.base_.env_;
177   conv->base_.param_ = convolution_delegate->conv_.base_.param_;
178   conv->base_.thread_nr_ = convolution_delegate->conv_.base_.thread_nr_;
179   conv->base_.train_session_ = convolution_delegate->conv_.base_.train_session_;
180   conv->base_.in_ = convolution_delegate->conv_.base_.in_;
181   conv->base_.in_size_ = convolution_delegate->conv_.base_.in_size_;
182   conv->base_.out_ = convolution_delegate->conv_.base_.out_;
183   conv->base_.out_size_ = convolution_delegate->conv_.base_.out_size_;
184 
185   conv->infershape_done_ = convolution_delegate->conv_.infershape_done_;
186   conv->shaing_manager_ = convolution_delegate->conv_.shaing_manager_;
187   conv->get_sharing_weight_ = convolution_delegate->conv_.get_sharing_weight_;
188   conv->free_sharing_weight_ = convolution_delegate->conv_.free_sharing_weight_;
189   conv->is_sharing_pack_ = convolution_delegate->conv_.is_sharing_pack_;
190 
191   conv->origin_weight_ = convolution_delegate->origin_weight_;
192   conv->origin_bias_ = convolution_delegate->origin_bias_;
193   return conv;
194 }
195 
ConvolutionDelegateFreeCopiedData(ConvolutionDelegateStruct * convolution_delegate)196 void ConvolutionDelegateFreeCopiedData(ConvolutionDelegateStruct *convolution_delegate) {
197   if (convolution_delegate->origin_weight_ != NULL && convolution_delegate->need_free_weight_) {
198     free(convolution_delegate->origin_weight_);
199   }
200   convolution_delegate->origin_weight_ = NULL;
201   convolution_delegate->conv_.origin_weight_ = NULL;
202   convolution_delegate->need_free_weight_ = false;
203 
204   if (convolution_delegate->origin_bias_ != NULL && convolution_delegate->need_free_bias_) {
205     free(convolution_delegate->origin_bias_);
206   }
207   convolution_delegate->origin_bias_ = NULL;
208   convolution_delegate->conv_.origin_bias_ = NULL;
209   convolution_delegate->need_free_bias_ = false;
210 }
211 
ConvolutionDelegateResize(struct KernelBase * self)212 int ConvolutionDelegateResize(struct KernelBase *self) {
213   ConvolutionDelegateStruct *convolution_delegate = (ConvolutionDelegateStruct *)self;
214   NNACL_CHECK_NULL_RETURN_ERR(convolution_delegate);
215 
216   if (convolution_delegate->convolution_ == NULL) {
217     convolution_delegate->convolution_ = ConvolutionDelegateConvolutionSelect(convolution_delegate);
218     NNACL_MALLOC_CHECK_NULL_RETURN_ERR(convolution_delegate->convolution_);
219     (void)ConvBaseUpdateComputeInfo(convolution_delegate->convolution_);
220     int ret = convolution_delegate->convolution_->base_.Prepare(&convolution_delegate->convolution_->base_);
221     if (ret != NNACL_OK) {
222       return ret;
223     }
224   }
225 
226   (void)ConvBaseUpdateComputeInfo(convolution_delegate->convolution_);
227   int ret = convolution_delegate->convolution_->base_.Resize(&convolution_delegate->convolution_->base_);
228   if (ret != NNACL_OK) {
229     return ret;
230   }
231 
232   ConvolutionDelegateFreeCopiedData(convolution_delegate);
233   return NNACL_OK;
234 }
235 
ConvolutionDelegatePrepare(struct KernelBase * self)236 int ConvolutionDelegatePrepare(struct KernelBase *self) {
237   ConvolutionDelegateStruct *convolution_delegate = (ConvolutionDelegateStruct *)self;
238   NNACL_CHECK_NULL_RETURN_ERR(convolution_delegate);
239 
240   NNACL_CHECK_FALSE(self->in_size_ < TWO_TENSOR, NNACL_INPUT_TENSOR_ERROR);
241   NNACL_CHECK_FALSE(self->out_size_ < ONE_TENSOR, NNACL_OUTPUT_TENSOR_ERROR);
242   NNACL_CHECK_NULL_RETURN_ERR(self->in_[SECOND_INPUT]);
243 
244   NNACL_CHECK_FALSE(self->in_[SECOND_INPUT]->data_type_ != kNumberTypeFloat32 &&
245                       self->in_[SECOND_INPUT]->data_type_ != kNumberTypeFloat16,
246                     NNACL_CONVOLUTION_WEIGHT_DATATYPE_INVALID);
247   NNACL_CHECK_FALSE(self->in_size_ == THREE_TENSOR && self->in_[THIRD_INPUT] != NULL &&
248                       self->in_[THIRD_INPUT]->data_type_ != kNumberTypeFloat32,
249                     NNACL_CONVOLUTION_BIAS_DATATYPE_INVALID);
250 
251   convolution_delegate->input_const_ = IsConst(self->in_[FIRST_INPUT]) && !self->train_session_;
252   convolution_delegate->weight_const_ = IsConst(self->in_[SECOND_INPUT]) && !self->train_session_;
253 
254   return ConvolutionDelegateGetWeightAndBias(convolution_delegate);
255 }
256 
ConvolutionDelegateRelease(struct KernelBase * self)257 int ConvolutionDelegateRelease(struct KernelBase *self) {
258   ConvolutionDelegateStruct *convolution_delegate = (ConvolutionDelegateStruct *)self;
259   NNACL_CHECK_NULL_RETURN_ERR(convolution_delegate);
260   if (convolution_delegate->convolution_ != NULL) {
261     (void)convolution_delegate->convolution_->base_.Release(&convolution_delegate->convolution_->base_);
262     free(convolution_delegate->convolution_);
263     convolution_delegate->convolution_ = NULL;
264   }
265   return NNACL_OK;
266 }
267 
ConvolutionDelegateCompute(struct KernelBase * self)268 int ConvolutionDelegateCompute(struct KernelBase *self) {
269   ConvolutionDelegateStruct *convolution_delegate = (ConvolutionDelegateStruct *)self;
270   NNACL_CHECK_NULL_RETURN_ERR(convolution_delegate);
271   NNACL_CHECK_NULL_RETURN_ERR(convolution_delegate->convolution_);
272 
273   convolution_delegate->convolution_->base_.workspace_ = convolution_delegate->conv_.base_.workspace_;
274   return convolution_delegate->convolution_->base_.Compute(&convolution_delegate->convolution_->base_);
275 }
276 
CreateConvlutionDelegate(ConvParameter * conv_param)277 KernelBase *CreateConvlutionDelegate(ConvParameter *conv_param) {
278   ConvolutionDelegateStruct *delegate = (ConvolutionDelegateStruct *)malloc(sizeof(ConvolutionDelegateStruct));
279   NNACL_MALLOC_CHECK_NULL_RETURN_NULL(delegate);
280   memset(delegate, 0, sizeof(ConvolutionDelegateStruct));
281   delegate->conv_.base_.Prepare = ConvolutionDelegatePrepare;
282   delegate->conv_.base_.Resize = ConvolutionDelegateResize;
283   delegate->conv_.base_.Release = ConvolutionDelegateRelease;
284   delegate->conv_.base_.Compute = ConvolutionDelegateCompute;
285   return (KernelBase *)delegate;
286 }
287 
CreateConvolutionDepthwise(ConvParameter * conv_param)288 KernelBase *CreateConvolutionDepthwise(ConvParameter *conv_param) {
289   NNACL_CHECK_NULL_RETURN_NULL(conv_param);
290   KernelBase *kernel = NULL;
291 
292   if (conv_param->dynamic_shape_) {
293     kernel = CreateConvDw(conv_param);
294     if (kernel != NULL) {
295       return kernel;
296     }
297   }
298 
299 #ifdef ENABLE_AVX
300   kernel = CreateConvDwSWAVX(conv_param);
301   if (kernel != NULL) {
302     return kernel;
303   }
304 #endif
305 
306 #if defined(ENABLE_ARM) || (defined(ENABLE_SSE) && !defined(ENABLE_AVX))
307   if (CheckConvDw1DWinograd(conv_param, conv_param->thread_num_)) {
308     kernel = CreateConvDw3x3(conv_param);
309     if (kernel != NULL) {
310       return kernel;
311     }
312   }
313 #endif
314 
315 #ifdef ENABLE_ARM64
316   if (CheckConvDwUseIndirectBuffer(conv_param)) {
317     kernel = CreateConvDwIndirect(conv_param);
318     if (kernel != NULL) {
319       return kernel;
320     }
321   }
322 #endif
323 
324   if (conv_param->input_channel_ < MaxDwConvSWSize) {
325     kernel = CreateConvDwSW(conv_param);
326     if (kernel != NULL) {
327       return kernel;
328     }
329   }
330 
331   kernel = CreateConvDw(conv_param);
332   return kernel;
333 }
334 
CreateConv2DFusion(OpParameter * param,int data_type)335 KernelBase *CreateConv2DFusion(OpParameter *param, int data_type) {
336   ConvParameter *conv_param = (ConvParameter *)param;
337   conv_param->thread_num_ = param->thread_num_;
338   KernelBase *kernel;
339   if (conv_param->group_ == 1) {
340     kernel = CreateConvlutionDelegate(conv_param);
341   } else if (conv_param->group_ == conv_param->input_channel_ && conv_param->group_ == conv_param->output_channel_) {
342     kernel = CreateConvolutionDepthwise(conv_param);
343   } else {
344     kernel = CreateGroupConvolution(conv_param, data_type);
345   }
346 
347   if (kernel == NULL) {
348     return NULL;
349   }
350 
351   ConvolutionBaseStruct *conv = (ConvolutionBaseStruct *)kernel;
352   (void)ConvBaseUpdateParamInfo(&conv->compute_, conv_param);
353 
354   return kernel;
355 }
356 
357 REG_KERNEL_CREATOR(PrimType_Conv2DFusion, kNumberTypeFloat32, CreateConv2DFusion)
358