1 /**
2 * Copyright 2023 Huawei Technologies Co., Ltd
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either convolutionress or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "nnacl/kernel/convolution_winograd_base.h"
18 #include "nnacl/base/minimal_filtering_generator.h"
19 #include "nnacl/fp32/winograd_transform.h"
20 #include "nnacl/fp32/conv_winograd_fp32.h"
21
ConvWinoBaseMallocWeightBiasData(ConvolutionBaseStruct * conv)22 int ConvWinoBaseMallocWeightBiasData(ConvolutionBaseStruct *conv) {
23 ConvolutionWinogradBaseStruct *winograd = (ConvolutionWinogradBaseStruct *)conv;
24 NNACL_MALLOC_CHECK_NULL_RETURN_ERR(winograd);
25
26 // set data
27 size_t trans_matrix_data_size = winograd->input_unit_ * winograd->input_unit_ * conv->compute_.in_c_ *
28 UP_ROUND(conv->compute_.out_c_, winograd->oc_block_) * sizeof(float);
29 if (!conv->base_.train_session_) {
30 if (conv->packed_weight_ == NULL) {
31 NNACL_CHECK_MALLOC_SIZE(trans_matrix_data_size);
32 conv->packed_weight_ = ConvBaseGetConvPackWeightData(conv, trans_matrix_data_size);
33 NNACL_MALLOC_CHECK_NULL_RETURN_ERR(conv->packed_weight_);
34 }
35 }
36
37 float matrix_a[CONVOLUTION_WINOGRAD_MATRIX_SIZE];
38 float matrix_at[CONVOLUTION_WINOGRAD_MATRIX_SIZE];
39 float matrix_b[CONVOLUTION_WINOGRAD_MATRIX_SIZE];
40 float matrix_bt[CONVOLUTION_WINOGRAD_MATRIX_SIZE];
41 float coef = 1.0f;
42 if (winograd->input_unit_ == CONVOLUTION_WINOGRAD_INPUT_UNIT_SIZE) {
43 coef = 0.5f;
44 }
45 int ret = CookToomFilter(matrix_a, matrix_at, matrix_b, matrix_bt, winograd->matrix_g_, winograd->matrix_gt_, coef,
46 winograd->output_unit_, winograd->kernel_unit_);
47 if (ret != NNACL_OK) {
48 return ret;
49 }
50
51 // init bias
52 size_t new_bias_size = UP_ROUND(conv->compute_.out_c_, C4NUM) * sizeof(float);
53 if (conv->bias_data_ == NULL) {
54 NNACL_CHECK_MALLOC_SIZE(new_bias_size);
55 conv->bias_data_ = conv->base_.env_->Alloc(conv->base_.env_->allocator_, new_bias_size);
56 NNACL_MALLOC_CHECK_NULL_RETURN_ERR(conv->bias_data_);
57 }
58 memset(conv->bias_data_, 0, new_bias_size);
59 return NNACL_OK;
60 }
61
ConvWinoBaseFreeTmpBuffer(ConvolutionWinogradBaseStruct * winograd)62 void ConvWinoBaseFreeTmpBuffer(ConvolutionWinogradBaseStruct *winograd) {
63 ExecEnv *env = winograd->conv_.base_.env_;
64 NNACL_CHECK_NULL_RETURN_VOID(env);
65
66 if (winograd->trans_input_ != NULL) {
67 env->Free(env->allocator_, winograd->trans_input_);
68 winograd->trans_input_ = NULL;
69 }
70 if (winograd->tmp_data_ != NULL) {
71 env->Free(env->allocator_, winograd->tmp_data_);
72 winograd->tmp_data_ = NULL;
73 }
74 if (winograd->gemm_out_ != NULL) {
75 env->Free(env->allocator_, winograd->gemm_out_);
76 winograd->gemm_out_ = NULL;
77 }
78 if (winograd->col_buffer_ != NULL) {
79 env->Free(env->allocator_, winograd->col_buffer_);
80 winograd->col_buffer_ = NULL;
81 }
82 if (winograd->opt_input_trans_ != NULL) {
83 env->Free(env->allocator_, winograd->opt_input_trans_);
84 winograd->opt_input_trans_ = NULL;
85 }
86 }
87
ConvWinoBaseInitGlobalVariable(ConvolutionBaseStruct * conv)88 void ConvWinoBaseInitGlobalVariable(ConvolutionBaseStruct *conv) {
89 ConvolutionWinogradBaseStruct *winograd = (ConvolutionWinogradBaseStruct *)conv;
90 winograd->oc_block_ = C8NUM;
91 winograd->tmp_data_tile_ = C4NUM;
92 winograd->tile_num_ = C12NUM;
93 }
94
ConvWinoBaseWinogradFilterTransform(ConvolutionWinogradBaseStruct * winograd,const float * weight_data)95 int ConvWinoBaseWinogradFilterTransform(ConvolutionWinogradBaseStruct *winograd, const float *weight_data) {
96 NNACL_CHECK_ZERO_RETURN_ERR(winograd->oc_block_);
97 return WinogradWeightTransform(weight_data, (float *)winograd->conv_.packed_weight_, winograd->matrix_g_,
98 winograd->matrix_gt_, winograd->oc_block_, winograd->input_unit_,
99 winograd->kernel_unit_, winograd->conv_.compute_.in_c_,
100 winograd->conv_.compute_.out_c_, true);
101 }
102
ConvWinoBasePackWeight(ConvolutionBaseStruct * conv)103 void ConvWinoBasePackWeight(ConvolutionBaseStruct *conv) {
104 ConvolutionWinogradBaseStruct *winograd = (ConvolutionWinogradBaseStruct *)conv;
105 NNACL_CHECK_NULL_RETURN_VOID(winograd);
106 TensorC *weight_tensor = conv->base_.in_[SECOND_INPUT];
107 NNACL_CHECK_NULL_RETURN_VOID(weight_tensor);
108 void *origin_weight = (conv->base_.train_session_) ? weight_tensor->data_ : conv->origin_weight_;
109 NNACL_CHECK_NULL_RETURN_VOID(origin_weight);
110 ConvWinoBaseWinogradFilterTransform(winograd, (float *)origin_weight);
111 }
112
ConvolutionWinogradBasePrepare(KernelBase * self)113 int ConvolutionWinogradBasePrepare(KernelBase *self) {
114 NNACL_CHECK_FALSE(self->in_size_ < TWO_TENSOR, NNACL_INPUT_TENSOR_ERROR);
115 NNACL_CHECK_FALSE(self->out_size_ < ONE_TENSOR, NNACL_OUTPUT_TENSOR_ERROR);
116
117 ConvolutionWinogradBaseStruct *winograd = (ConvolutionWinogradBaseStruct *)self;
118 NNACL_CHECK_NULL_RETURN_ERR(winograd);
119
120 winograd->conv_.init_global_variable_(&winograd->conv_);
121
122 winograd->kernel_unit_ = winograd->conv_.compute_.kernel_h_;
123 winograd->input_unit_ = winograd->output_unit_ + winograd->kernel_unit_ - 1;
124
125 if (self->train_session_) {
126 TensorC *filter_tensor = self->in_[SECOND_INPUT];
127 NNACL_CHECK_NULL_RETURN_ERR(filter_tensor);
128 NNACL_CHECK_FALSE(filter_tensor->shape_size_ != DIMENSION_4D, NNACL_CONVOLUTION_WEIGHT_SHAPE_INVALID);
129
130 int input_plane = winograd->input_unit_ * winograd->input_unit_;
131 NNACL_CHECK_INT_MUL_NOT_OVERFLOW(input_plane, winograd->conv_.compute_.in_c_, NNACL_ERR);
132 int in_chw = input_plane * winograd->conv_.compute_.in_c_;
133 NNACL_CHECK_INT_MUL_NOT_OVERFLOW(in_chw, UP_ROUND(winograd->conv_.compute_.out_c_, winograd->oc_block_), NNACL_ERR);
134 int trans_matrix_data_size =
135 in_chw * UP_ROUND(winograd->conv_.compute_.out_c_, winograd->oc_block_) * sizeof(float);
136 self->work_size_ = trans_matrix_data_size;
137 }
138
139 return ConvBaseInitConvWeightBias(&winograd->conv_);
140 }
141
ConvoWinoBaseUpdateThreadNumProcess(ConvolutionWinogradBaseStruct * winograd)142 int ConvoWinoBaseUpdateThreadNumProcess(ConvolutionWinogradBaseStruct *winograd) {
143 if (winograd->conv_.compute_.in_n_ % winograd->conv_.base_.thread_nr_ == 0) {
144 winograd->conv_.use_batch_cut_flag_ = true;
145 return NNACL_OK;
146 } else {
147 winograd->conv_.use_batch_cut_flag_ = false;
148 }
149
150 int update_thread = UP_DIV(UP_DIV(winograd->conv_.compute_.out_hw_, C12NUM), ConvMinBlock);
151 winograd->conv_.base_.thread_nr_ = NNACL_MIN(update_thread, winograd->conv_.base_.thread_nr_);
152 return NNACL_OK;
153 }
154
ConvoWinoBaseUpdateThread(ConvolutionWinogradBaseStruct * winograd)155 int ConvoWinoBaseUpdateThread(ConvolutionWinogradBaseStruct *winograd) {
156 #ifdef DYNAMIC_THREAD_DISTRIBUTE
157 ConvoWinoBaseUpdateThreadNumProcess(winograd);
158 #else
159 KernelBase *base = &winograd->conv_.base_;
160 base->thread_nr_ = base->UpdateThread(TC_PTYPE(PrimType_Conv2DFusion), 0, 0, 0, base->thread_nr_);
161 #endif
162 return NNACL_OK;
163 }
164
ConvWinoBaseConfigInputOutput(ConvolutionWinogradBaseStruct * winograd)165 int ConvWinoBaseConfigInputOutput(ConvolutionWinogradBaseStruct *winograd) {
166 winograd->transfer_functions_.in_func_ = GetInputTransFunc(winograd->input_unit_);
167 NNACL_CHECK_NULL_RETURN_ERR(winograd->transfer_functions_.in_func_);
168
169 ConvParameter *conv_param = (ConvParameter *)winograd->conv_.base_.param_;
170 winograd->transfer_functions_.out_func_ =
171 GetOutputTransFunc(winograd->input_unit_, winograd->output_unit_, conv_param->act_type_);
172 NNACL_CHECK_NULL_RETURN_ERR(winograd->transfer_functions_.out_func_);
173
174 return NNACL_OK;
175 }
176
ConvoWinoBaseInitTmpBuffer(ConvolutionWinogradBaseStruct * winograd)177 int ConvoWinoBaseInitTmpBuffer(ConvolutionWinogradBaseStruct *winograd) {
178 ExecEnv *env = winograd->conv_.base_.env_;
179 NNACL_CHECK_NULL_RETURN_ERR(env);
180
181 int thread_input_plane = winograd->conv_.base_.thread_nr_ * winograd->input_unit_ * winograd->input_unit_;
182 NNACL_CHECK_INT_MUL_NOT_OVERFLOW(winograd->tile_num_, thread_input_plane, NNACL_ERR);
183 int total_thread_input_plane = winograd->tile_num_ * thread_input_plane;
184 size_t tile_buffer_size = total_thread_input_plane * winograd->conv_.compute_.in_c_ * sizeof(float);
185 winograd->trans_input_ = (float *)env->Alloc(env->allocator_, tile_buffer_size);
186 NNACL_MALLOC_CHECK_NULL_RETURN_ERR(winograd->trans_input_);
187
188 int oc8 = UP_ROUND(winograd->conv_.compute_.out_c_, C8NUM);
189 winograd->gemm_out_ = env->Alloc(env->allocator_, total_thread_input_plane * oc8 * sizeof(float));
190 NNACL_MALLOC_CHECK_NULL_RETURN_ERR(winograd->gemm_out_);
191
192 winograd->tmp_data_ = env->Alloc(env->allocator_, winograd->tmp_data_tile_ * thread_input_plane * sizeof(float));
193 NNACL_MALLOC_CHECK_NULL_RETURN_ERR(winograd->tmp_data_);
194
195 winograd->col_buffer_ = env->Alloc(env->allocator_, winograd->conv_.base_.thread_nr_ * winograd->tile_num_ *
196 winograd->conv_.compute_.in_c_ * sizeof(float));
197 NNACL_MALLOC_CHECK_NULL_RETURN_ERR(winograd->col_buffer_);
198
199 int tile = UP_ROUND(winograd->conv_.compute_.in_c_, winograd->tmp_data_tile_);
200 NNACL_CHECK_INT_MUL_NOT_OVERFLOW(total_thread_input_plane, tile, NNACL_ERR);
201 winograd->opt_input_trans_ = env->Alloc(env->allocator_, total_thread_input_plane * tile * sizeof(float));
202 NNACL_MALLOC_CHECK_NULL_RETURN_ERR(winograd->opt_input_trans_);
203
204 winograd->tmp_buffer_address_list_[Index0] = winograd->trans_input_;
205 winograd->tmp_buffer_address_list_[Index1] = winograd->gemm_out_;
206 winograd->tmp_buffer_address_list_[Index2] = winograd->tmp_data_;
207 winograd->tmp_buffer_address_list_[Index3] = winograd->col_buffer_;
208 winograd->tmp_buffer_address_list_[Index4] = winograd->opt_input_trans_;
209 return NNACL_OK;
210 }
211
ConvWinoBaseRunImpl(ConvolutionBaseStruct * conv,int task_id)212 int ConvWinoBaseRunImpl(ConvolutionBaseStruct *conv, int task_id) {
213 ConvolutionWinogradBaseStruct *winograd = (ConvolutionWinogradBaseStruct *)conv;
214 NNACL_CHECK_NULL_RETURN_ERR(winograd);
215 ConvParameter *conv_param = (ConvParameter *)conv->base_.param_;
216 NNACL_CHECK_NULL_RETURN_ERR(conv_param);
217
218 TensorC *input_tensor = conv->base_.in_[FIRST_INPUT];
219 NNACL_CHECK_NULL_RETURN_ERR(input_tensor);
220 float *input_data = (float *)input_tensor->data_;
221 NNACL_CHECK_NULL_RETURN_ERR(input_data);
222
223 TensorC *output_tensor = conv->base_.out_[OUTPUT_INDEX];
224 NNACL_CHECK_NULL_RETURN_ERR(output_tensor);
225 float *output_data = (float *)output_tensor->data_;
226 NNACL_CHECK_NULL_RETURN_ERR(output_data);
227
228 if (conv->use_batch_cut_flag_) {
229 ConvWinogardFp32CutByBatch(input_data, (float *)conv->packed_weight_, (float *)conv->bias_data_, output_data,
230 winograd->tmp_buffer_address_list_, task_id, conv_param, winograd->transfer_functions_);
231 } else {
232 ConvWinogardFp32(input_data, (float *)conv->packed_weight_, (float *)conv->bias_data_, output_data,
233 winograd->tmp_buffer_address_list_, task_id, conv_param, winograd->transfer_functions_);
234 }
235
236 return NNACL_OK;
237 }
238
ConvWinoImpl(void * cdata,int task_id,float l,float r)239 int ConvWinoImpl(void *cdata, int task_id, float l, float r) {
240 ConvolutionBaseStruct *conv = (ConvolutionBaseStruct *)cdata;
241 NNACL_CHECK_NULL_RETURN_ERR(conv);
242 return conv->run_impl_(conv, task_id);
243 }
244
ConvWinoBaseUpdateParam(ConvParameter * param,ConvolutionWinogradBaseStruct * winograd)245 void ConvWinoBaseUpdateParam(ConvParameter *param, ConvolutionWinogradBaseStruct *winograd) {
246 param->input_unit_ = winograd->input_unit_;
247 param->output_unit_ = winograd->output_unit_;
248 }
249
ConvolutionWinogradBaseResize(KernelBase * self)250 int ConvolutionWinogradBaseResize(KernelBase *self) {
251 ConvolutionWinogradBaseStruct *winograd = (ConvolutionWinogradBaseStruct *)self;
252 NNACL_CHECK_NULL_RETURN_ERR(winograd);
253
254 int ret = ConvBaseCheckResizeValid(&winograd->conv_);
255 if (ret != NNACL_OK) {
256 return ret;
257 }
258
259 ret = ConvBasePrepare(&winograd->conv_);
260 if (ret != NNACL_OK) {
261 return ret;
262 }
263
264 ret = ConvoWinoBaseUpdateThread(winograd);
265 if (ret != NNACL_OK) {
266 return ret;
267 }
268
269 ret = winograd->config_input_output_(winograd);
270 if (ret != NNACL_OK) {
271 return ret;
272 }
273
274 ConvWinoBaseUpdateParam((ConvParameter *)self->param_, winograd);
275 return NNACL_OK;
276 }
277
ConvolutionWinogradBaseCompute(KernelBase * self)278 int ConvolutionWinogradBaseCompute(KernelBase *self) {
279 ConvolutionWinogradBaseStruct *winograd = (ConvolutionWinogradBaseStruct *)self;
280 NNACL_CHECK_NULL_RETURN_ERR(winograd);
281
282 int ret = ConvoWinoBaseInitTmpBuffer(winograd);
283 if (ret != NNACL_OK) {
284 ConvWinoBaseFreeTmpBuffer(winograd);
285 return ret;
286 }
287
288 ret = ConvBaseRepackWeight(&winograd->conv_);
289 if (ret != NNACL_OK) {
290 ConvWinoBaseFreeTmpBuffer(winograd);
291 return ret;
292 }
293
294 ret = self->env_->ParallelLaunch(self->env_->thread_pool_, ConvWinoImpl, self, self->thread_nr_);
295 ConvWinoBaseFreeTmpBuffer(winograd);
296 return ret;
297 }
298
ConvolutionWinogradBaseRelease(KernelBase * self)299 int ConvolutionWinogradBaseRelease(KernelBase *self) {
300 ConvolutionBaseStruct *conv = (ConvolutionBaseStruct *)self;
301 NNACL_CHECK_NULL_RETURN_ERR(conv);
302 ConvBaseRelease(conv);
303 return NNACL_OK;
304 }
305
CreateConvWinogradBase(ConvParameter * conv_param)306 ConvolutionWinogradBaseStruct *CreateConvWinogradBase(ConvParameter *conv_param) {
307 ConvolutionWinogradBaseStruct *winograd =
308 (ConvolutionWinogradBaseStruct *)malloc(sizeof(ConvolutionWinogradBaseStruct));
309 NNACL_MALLOC_CHECK_NULL_RETURN_NULL(winograd);
310 memset(winograd, 0, sizeof(ConvolutionWinogradBaseStruct));
311
312 winograd->config_input_output_ = ConvWinoBaseConfigInputOutput;
313 winograd->conv_.init_global_variable_ = ConvWinoBaseInitGlobalVariable;
314
315 winograd->conv_.base_.Prepare = ConvolutionWinogradBasePrepare;
316 winograd->conv_.base_.Resize = ConvolutionWinogradBaseResize;
317 winograd->conv_.base_.Release = ConvolutionWinogradBaseRelease;
318 winograd->conv_.base_.Compute = ConvolutionWinogradBaseCompute;
319 return (ConvolutionWinogradBaseStruct *)winograd;
320 }
321