• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright 2023 Huawei Technologies Co., Ltd
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either convolutionress or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "nnacl/kernel/convolution_winograd_base.h"
18 #include "nnacl/base/minimal_filtering_generator.h"
19 #include "nnacl/fp32/winograd_transform.h"
20 #include "nnacl/fp32/conv_winograd_fp32.h"
21 
ConvWinoBaseMallocWeightBiasData(ConvolutionBaseStruct * conv)22 int ConvWinoBaseMallocWeightBiasData(ConvolutionBaseStruct *conv) {
23   ConvolutionWinogradBaseStruct *winograd = (ConvolutionWinogradBaseStruct *)conv;
24   NNACL_MALLOC_CHECK_NULL_RETURN_ERR(winograd);
25 
26   // set data
27   size_t trans_matrix_data_size = winograd->input_unit_ * winograd->input_unit_ * conv->compute_.in_c_ *
28                                   UP_ROUND(conv->compute_.out_c_, winograd->oc_block_) * sizeof(float);
29   if (!conv->base_.train_session_) {
30     if (conv->packed_weight_ == NULL) {
31       NNACL_CHECK_MALLOC_SIZE(trans_matrix_data_size);
32       conv->packed_weight_ = ConvBaseGetConvPackWeightData(conv, trans_matrix_data_size);
33       NNACL_MALLOC_CHECK_NULL_RETURN_ERR(conv->packed_weight_);
34     }
35   }
36 
37   float matrix_a[CONVOLUTION_WINOGRAD_MATRIX_SIZE];
38   float matrix_at[CONVOLUTION_WINOGRAD_MATRIX_SIZE];
39   float matrix_b[CONVOLUTION_WINOGRAD_MATRIX_SIZE];
40   float matrix_bt[CONVOLUTION_WINOGRAD_MATRIX_SIZE];
41   float coef = 1.0f;
42   if (winograd->input_unit_ == CONVOLUTION_WINOGRAD_INPUT_UNIT_SIZE) {
43     coef = 0.5f;
44   }
45   int ret = CookToomFilter(matrix_a, matrix_at, matrix_b, matrix_bt, winograd->matrix_g_, winograd->matrix_gt_, coef,
46                            winograd->output_unit_, winograd->kernel_unit_);
47   if (ret != NNACL_OK) {
48     return ret;
49   }
50 
51   // init bias
52   size_t new_bias_size = UP_ROUND(conv->compute_.out_c_, C4NUM) * sizeof(float);
53   if (conv->bias_data_ == NULL) {
54     NNACL_CHECK_MALLOC_SIZE(new_bias_size);
55     conv->bias_data_ = conv->base_.env_->Alloc(conv->base_.env_->allocator_, new_bias_size);
56     NNACL_MALLOC_CHECK_NULL_RETURN_ERR(conv->bias_data_);
57   }
58   memset(conv->bias_data_, 0, new_bias_size);
59   return NNACL_OK;
60 }
61 
ConvWinoBaseFreeTmpBuffer(ConvolutionWinogradBaseStruct * winograd)62 void ConvWinoBaseFreeTmpBuffer(ConvolutionWinogradBaseStruct *winograd) {
63   ExecEnv *env = winograd->conv_.base_.env_;
64   NNACL_CHECK_NULL_RETURN_VOID(env);
65 
66   if (winograd->trans_input_ != NULL) {
67     env->Free(env->allocator_, winograd->trans_input_);
68     winograd->trans_input_ = NULL;
69   }
70   if (winograd->tmp_data_ != NULL) {
71     env->Free(env->allocator_, winograd->tmp_data_);
72     winograd->tmp_data_ = NULL;
73   }
74   if (winograd->gemm_out_ != NULL) {
75     env->Free(env->allocator_, winograd->gemm_out_);
76     winograd->gemm_out_ = NULL;
77   }
78   if (winograd->col_buffer_ != NULL) {
79     env->Free(env->allocator_, winograd->col_buffer_);
80     winograd->col_buffer_ = NULL;
81   }
82   if (winograd->opt_input_trans_ != NULL) {
83     env->Free(env->allocator_, winograd->opt_input_trans_);
84     winograd->opt_input_trans_ = NULL;
85   }
86 }
87 
ConvWinoBaseInitGlobalVariable(ConvolutionBaseStruct * conv)88 void ConvWinoBaseInitGlobalVariable(ConvolutionBaseStruct *conv) {
89   ConvolutionWinogradBaseStruct *winograd = (ConvolutionWinogradBaseStruct *)conv;
90   winograd->oc_block_ = C8NUM;
91   winograd->tmp_data_tile_ = C4NUM;
92   winograd->tile_num_ = C12NUM;
93 }
94 
ConvWinoBaseWinogradFilterTransform(ConvolutionWinogradBaseStruct * winograd,const float * weight_data)95 int ConvWinoBaseWinogradFilterTransform(ConvolutionWinogradBaseStruct *winograd, const float *weight_data) {
96   NNACL_CHECK_ZERO_RETURN_ERR(winograd->oc_block_);
97   return WinogradWeightTransform(weight_data, (float *)winograd->conv_.packed_weight_, winograd->matrix_g_,
98                                  winograd->matrix_gt_, winograd->oc_block_, winograd->input_unit_,
99                                  winograd->kernel_unit_, winograd->conv_.compute_.in_c_,
100                                  winograd->conv_.compute_.out_c_, true);
101 }
102 
ConvWinoBasePackWeight(ConvolutionBaseStruct * conv)103 void ConvWinoBasePackWeight(ConvolutionBaseStruct *conv) {
104   ConvolutionWinogradBaseStruct *winograd = (ConvolutionWinogradBaseStruct *)conv;
105   NNACL_CHECK_NULL_RETURN_VOID(winograd);
106   TensorC *weight_tensor = conv->base_.in_[SECOND_INPUT];
107   NNACL_CHECK_NULL_RETURN_VOID(weight_tensor);
108   void *origin_weight = (conv->base_.train_session_) ? weight_tensor->data_ : conv->origin_weight_;
109   NNACL_CHECK_NULL_RETURN_VOID(origin_weight);
110   ConvWinoBaseWinogradFilterTransform(winograd, (float *)origin_weight);
111 }
112 
ConvolutionWinogradBasePrepare(KernelBase * self)113 int ConvolutionWinogradBasePrepare(KernelBase *self) {
114   NNACL_CHECK_FALSE(self->in_size_ < TWO_TENSOR, NNACL_INPUT_TENSOR_ERROR);
115   NNACL_CHECK_FALSE(self->out_size_ < ONE_TENSOR, NNACL_OUTPUT_TENSOR_ERROR);
116 
117   ConvolutionWinogradBaseStruct *winograd = (ConvolutionWinogradBaseStruct *)self;
118   NNACL_CHECK_NULL_RETURN_ERR(winograd);
119 
120   winograd->conv_.init_global_variable_(&winograd->conv_);
121 
122   winograd->kernel_unit_ = winograd->conv_.compute_.kernel_h_;
123   winograd->input_unit_ = winograd->output_unit_ + winograd->kernel_unit_ - 1;
124 
125   if (self->train_session_) {
126     TensorC *filter_tensor = self->in_[SECOND_INPUT];
127     NNACL_CHECK_NULL_RETURN_ERR(filter_tensor);
128     NNACL_CHECK_FALSE(filter_tensor->shape_size_ != DIMENSION_4D, NNACL_CONVOLUTION_WEIGHT_SHAPE_INVALID);
129 
130     int input_plane = winograd->input_unit_ * winograd->input_unit_;
131     NNACL_CHECK_INT_MUL_NOT_OVERFLOW(input_plane, winograd->conv_.compute_.in_c_, NNACL_ERR);
132     int in_chw = input_plane * winograd->conv_.compute_.in_c_;
133     NNACL_CHECK_INT_MUL_NOT_OVERFLOW(in_chw, UP_ROUND(winograd->conv_.compute_.out_c_, winograd->oc_block_), NNACL_ERR);
134     int trans_matrix_data_size =
135       in_chw * UP_ROUND(winograd->conv_.compute_.out_c_, winograd->oc_block_) * sizeof(float);
136     self->work_size_ = trans_matrix_data_size;
137   }
138 
139   return ConvBaseInitConvWeightBias(&winograd->conv_);
140 }
141 
ConvoWinoBaseUpdateThreadNumProcess(ConvolutionWinogradBaseStruct * winograd)142 int ConvoWinoBaseUpdateThreadNumProcess(ConvolutionWinogradBaseStruct *winograd) {
143   if (winograd->conv_.compute_.in_n_ % winograd->conv_.base_.thread_nr_ == 0) {
144     winograd->conv_.use_batch_cut_flag_ = true;
145     return NNACL_OK;
146   } else {
147     winograd->conv_.use_batch_cut_flag_ = false;
148   }
149 
150   int update_thread = UP_DIV(UP_DIV(winograd->conv_.compute_.out_hw_, C12NUM), ConvMinBlock);
151   winograd->conv_.base_.thread_nr_ = NNACL_MIN(update_thread, winograd->conv_.base_.thread_nr_);
152   return NNACL_OK;
153 }
154 
ConvoWinoBaseUpdateThread(ConvolutionWinogradBaseStruct * winograd)155 int ConvoWinoBaseUpdateThread(ConvolutionWinogradBaseStruct *winograd) {
156 #ifdef DYNAMIC_THREAD_DISTRIBUTE
157   ConvoWinoBaseUpdateThreadNumProcess(winograd);
158 #else
159   KernelBase *base = &winograd->conv_.base_;
160   base->thread_nr_ = base->UpdateThread(TC_PTYPE(PrimType_Conv2DFusion), 0, 0, 0, base->thread_nr_);
161 #endif
162   return NNACL_OK;
163 }
164 
ConvWinoBaseConfigInputOutput(ConvolutionWinogradBaseStruct * winograd)165 int ConvWinoBaseConfigInputOutput(ConvolutionWinogradBaseStruct *winograd) {
166   winograd->transfer_functions_.in_func_ = GetInputTransFunc(winograd->input_unit_);
167   NNACL_CHECK_NULL_RETURN_ERR(winograd->transfer_functions_.in_func_);
168 
169   ConvParameter *conv_param = (ConvParameter *)winograd->conv_.base_.param_;
170   winograd->transfer_functions_.out_func_ =
171     GetOutputTransFunc(winograd->input_unit_, winograd->output_unit_, conv_param->act_type_);
172   NNACL_CHECK_NULL_RETURN_ERR(winograd->transfer_functions_.out_func_);
173 
174   return NNACL_OK;
175 }
176 
ConvoWinoBaseInitTmpBuffer(ConvolutionWinogradBaseStruct * winograd)177 int ConvoWinoBaseInitTmpBuffer(ConvolutionWinogradBaseStruct *winograd) {
178   ExecEnv *env = winograd->conv_.base_.env_;
179   NNACL_CHECK_NULL_RETURN_ERR(env);
180 
181   int thread_input_plane = winograd->conv_.base_.thread_nr_ * winograd->input_unit_ * winograd->input_unit_;
182   NNACL_CHECK_INT_MUL_NOT_OVERFLOW(winograd->tile_num_, thread_input_plane, NNACL_ERR);
183   int total_thread_input_plane = winograd->tile_num_ * thread_input_plane;
184   size_t tile_buffer_size = total_thread_input_plane * winograd->conv_.compute_.in_c_ * sizeof(float);
185   winograd->trans_input_ = (float *)env->Alloc(env->allocator_, tile_buffer_size);
186   NNACL_MALLOC_CHECK_NULL_RETURN_ERR(winograd->trans_input_);
187 
188   int oc8 = UP_ROUND(winograd->conv_.compute_.out_c_, C8NUM);
189   winograd->gemm_out_ = env->Alloc(env->allocator_, total_thread_input_plane * oc8 * sizeof(float));
190   NNACL_MALLOC_CHECK_NULL_RETURN_ERR(winograd->gemm_out_);
191 
192   winograd->tmp_data_ = env->Alloc(env->allocator_, winograd->tmp_data_tile_ * thread_input_plane * sizeof(float));
193   NNACL_MALLOC_CHECK_NULL_RETURN_ERR(winograd->tmp_data_);
194 
195   winograd->col_buffer_ = env->Alloc(env->allocator_, winograd->conv_.base_.thread_nr_ * winograd->tile_num_ *
196                                                         winograd->conv_.compute_.in_c_ * sizeof(float));
197   NNACL_MALLOC_CHECK_NULL_RETURN_ERR(winograd->col_buffer_);
198 
199   int tile = UP_ROUND(winograd->conv_.compute_.in_c_, winograd->tmp_data_tile_);
200   NNACL_CHECK_INT_MUL_NOT_OVERFLOW(total_thread_input_plane, tile, NNACL_ERR);
201   winograd->opt_input_trans_ = env->Alloc(env->allocator_, total_thread_input_plane * tile * sizeof(float));
202   NNACL_MALLOC_CHECK_NULL_RETURN_ERR(winograd->opt_input_trans_);
203 
204   winograd->tmp_buffer_address_list_[Index0] = winograd->trans_input_;
205   winograd->tmp_buffer_address_list_[Index1] = winograd->gemm_out_;
206   winograd->tmp_buffer_address_list_[Index2] = winograd->tmp_data_;
207   winograd->tmp_buffer_address_list_[Index3] = winograd->col_buffer_;
208   winograd->tmp_buffer_address_list_[Index4] = winograd->opt_input_trans_;
209   return NNACL_OK;
210 }
211 
ConvWinoBaseRunImpl(ConvolutionBaseStruct * conv,int task_id)212 int ConvWinoBaseRunImpl(ConvolutionBaseStruct *conv, int task_id) {
213   ConvolutionWinogradBaseStruct *winograd = (ConvolutionWinogradBaseStruct *)conv;
214   NNACL_CHECK_NULL_RETURN_ERR(winograd);
215   ConvParameter *conv_param = (ConvParameter *)conv->base_.param_;
216   NNACL_CHECK_NULL_RETURN_ERR(conv_param);
217 
218   TensorC *input_tensor = conv->base_.in_[FIRST_INPUT];
219   NNACL_CHECK_NULL_RETURN_ERR(input_tensor);
220   float *input_data = (float *)input_tensor->data_;
221   NNACL_CHECK_NULL_RETURN_ERR(input_data);
222 
223   TensorC *output_tensor = conv->base_.out_[OUTPUT_INDEX];
224   NNACL_CHECK_NULL_RETURN_ERR(output_tensor);
225   float *output_data = (float *)output_tensor->data_;
226   NNACL_CHECK_NULL_RETURN_ERR(output_data);
227 
228   if (conv->use_batch_cut_flag_) {
229     ConvWinogardFp32CutByBatch(input_data, (float *)conv->packed_weight_, (float *)conv->bias_data_, output_data,
230                                winograd->tmp_buffer_address_list_, task_id, conv_param, winograd->transfer_functions_);
231   } else {
232     ConvWinogardFp32(input_data, (float *)conv->packed_weight_, (float *)conv->bias_data_, output_data,
233                      winograd->tmp_buffer_address_list_, task_id, conv_param, winograd->transfer_functions_);
234   }
235 
236   return NNACL_OK;
237 }
238 
ConvWinoImpl(void * cdata,int task_id,float l,float r)239 int ConvWinoImpl(void *cdata, int task_id, float l, float r) {
240   ConvolutionBaseStruct *conv = (ConvolutionBaseStruct *)cdata;
241   NNACL_CHECK_NULL_RETURN_ERR(conv);
242   return conv->run_impl_(conv, task_id);
243 }
244 
ConvWinoBaseUpdateParam(ConvParameter * param,ConvolutionWinogradBaseStruct * winograd)245 void ConvWinoBaseUpdateParam(ConvParameter *param, ConvolutionWinogradBaseStruct *winograd) {
246   param->input_unit_ = winograd->input_unit_;
247   param->output_unit_ = winograd->output_unit_;
248 }
249 
ConvolutionWinogradBaseResize(KernelBase * self)250 int ConvolutionWinogradBaseResize(KernelBase *self) {
251   ConvolutionWinogradBaseStruct *winograd = (ConvolutionWinogradBaseStruct *)self;
252   NNACL_CHECK_NULL_RETURN_ERR(winograd);
253 
254   int ret = ConvBaseCheckResizeValid(&winograd->conv_);
255   if (ret != NNACL_OK) {
256     return ret;
257   }
258 
259   ret = ConvBasePrepare(&winograd->conv_);
260   if (ret != NNACL_OK) {
261     return ret;
262   }
263 
264   ret = ConvoWinoBaseUpdateThread(winograd);
265   if (ret != NNACL_OK) {
266     return ret;
267   }
268 
269   ret = winograd->config_input_output_(winograd);
270   if (ret != NNACL_OK) {
271     return ret;
272   }
273 
274   ConvWinoBaseUpdateParam((ConvParameter *)self->param_, winograd);
275   return NNACL_OK;
276 }
277 
ConvolutionWinogradBaseCompute(KernelBase * self)278 int ConvolutionWinogradBaseCompute(KernelBase *self) {
279   ConvolutionWinogradBaseStruct *winograd = (ConvolutionWinogradBaseStruct *)self;
280   NNACL_CHECK_NULL_RETURN_ERR(winograd);
281 
282   int ret = ConvoWinoBaseInitTmpBuffer(winograd);
283   if (ret != NNACL_OK) {
284     ConvWinoBaseFreeTmpBuffer(winograd);
285     return ret;
286   }
287 
288   ret = ConvBaseRepackWeight(&winograd->conv_);
289   if (ret != NNACL_OK) {
290     ConvWinoBaseFreeTmpBuffer(winograd);
291     return ret;
292   }
293 
294   ret = self->env_->ParallelLaunch(self->env_->thread_pool_, ConvWinoImpl, self, self->thread_nr_);
295   ConvWinoBaseFreeTmpBuffer(winograd);
296   return ret;
297 }
298 
ConvolutionWinogradBaseRelease(KernelBase * self)299 int ConvolutionWinogradBaseRelease(KernelBase *self) {
300   ConvolutionBaseStruct *conv = (ConvolutionBaseStruct *)self;
301   NNACL_CHECK_NULL_RETURN_ERR(conv);
302   ConvBaseRelease(conv);
303   return NNACL_OK;
304 }
305 
CreateConvWinogradBase(ConvParameter * conv_param)306 ConvolutionWinogradBaseStruct *CreateConvWinogradBase(ConvParameter *conv_param) {
307   ConvolutionWinogradBaseStruct *winograd =
308     (ConvolutionWinogradBaseStruct *)malloc(sizeof(ConvolutionWinogradBaseStruct));
309   NNACL_MALLOC_CHECK_NULL_RETURN_NULL(winograd);
310   memset(winograd, 0, sizeof(ConvolutionWinogradBaseStruct));
311 
312   winograd->config_input_output_ = ConvWinoBaseConfigInputOutput;
313   winograd->conv_.init_global_variable_ = ConvWinoBaseInitGlobalVariable;
314 
315   winograd->conv_.base_.Prepare = ConvolutionWinogradBasePrepare;
316   winograd->conv_.base_.Resize = ConvolutionWinogradBaseResize;
317   winograd->conv_.base_.Release = ConvolutionWinogradBaseRelease;
318   winograd->conv_.base_.Compute = ConvolutionWinogradBaseCompute;
319   return (ConvolutionWinogradBaseStruct *)winograd;
320 }
321