• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright 2023 Huawei Technologies Co., Ltd
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either convolutionress or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "nnacl/kernel/convolution_im2col_base.h"
18 #include "nnacl/kernel/convolution_base.h"
19 #include "nnacl/fp32/pack_fp32.h"
20 #include "nnacl/fp32/conv_common_fp32.h"
21 
ConvIm2ColBaseImpl(void * cdata,int task_id,float l,float r)22 int ConvIm2ColBaseImpl(void *cdata, int task_id, float l, float r) {
23   ConvolutionBaseStruct *conv = (ConvolutionBaseStruct *)cdata;
24   NNACL_CHECK_NULL_RETURN_ERR(cdata);
25   return conv->run_impl_(conv, task_id);
26 }
27 
ConvIm2ColBaseRunImpl(ConvolutionBaseStruct * conv,int task_id)28 int ConvIm2ColBaseRunImpl(ConvolutionBaseStruct *conv, int task_id) {
29   ConvolutionIm2ColBaseStruct *conv_im2col = (ConvolutionIm2ColBaseStruct *)conv;
30 
31   float *ori_input_data = (float *)conv->base_.in_[FIRST_INPUT]->data_;
32   NNACL_CHECK_NULL_RETURN_ERR(ori_input_data);
33   ConvParameter *conv_param = (ConvParameter *)conv->base_.param_;
34   NNACL_CHECK_NULL_RETURN_ERR(conv_param);
35 
36   if (conv->use_batch_cut_flag_) {
37     ConvFp32CutByBatch(ori_input_data, conv_im2col->packed_input_, (float *)conv->packed_weight_,
38                        (float *)conv->bias_data_, conv_im2col->col_major_input_, conv_im2col->tmp_output_, task_id,
39                        conv_param);
40   } else {
41     ConvFp32(ori_input_data, conv_im2col->packed_input_, (float *)conv->packed_weight_, (float *)conv->bias_data_,
42              conv_im2col->col_major_input_, conv_im2col->tmp_output_, task_id, conv_param);
43   }
44   return NNACL_OK;
45 }
46 
ConvIm2ColBaseMallocWeightBiasData(ConvolutionBaseStruct * conv)47 int ConvIm2ColBaseMallocWeightBiasData(ConvolutionBaseStruct *conv) {
48   ConvolutionIm2ColBaseStruct *conv_im2col = (ConvolutionIm2ColBaseStruct *)conv;
49   NNACL_CHECK_NULL_RETURN_ERR(conv_im2col);
50 
51   size_t oc_block_num = UP_ROUND(conv->compute_.out_c_, conv_im2col->oc_tile_);
52   size_t pack_weight_size = oc_block_num * conv->compute_.in_c_ * conv->compute_.kernel_hw_;
53   if (!conv->base_.train_session_) {
54     NNACL_CHECK_MALLOC_SIZE(pack_weight_size * sizeof(float));
55     conv->packed_weight_ = ConvBaseGetConvPackWeightData(conv, pack_weight_size * sizeof(float));
56     NNACL_MALLOC_CHECK_NULL_RETURN_ERR(conv->packed_weight_);
57   }
58 
59   if (conv->bias_data_ == NULL) {
60     NNACL_CHECK_MALLOC_SIZE(oc_block_num * sizeof(float));
61     conv->bias_data_ = conv->base_.env_->Alloc(conv->base_.env_->allocator_, oc_block_num * sizeof(float));
62     NNACL_MALLOC_CHECK_NULL_RETURN_ERR(conv->bias_data_);
63   }
64   memset(conv->bias_data_, 0, oc_block_num * sizeof(float));
65   return NNACL_OK;
66 }
67 
ConvIm2ColBaseUpdateThreadNumProcess(KernelBase * self,int32_t kernel_type,int64_t per_unit_load_num,int64_t per_unit_store_num,int64_t unit_num)68 int ConvIm2ColBaseUpdateThreadNumProcess(KernelBase *self, int32_t kernel_type, int64_t per_unit_load_num,
69                                          int64_t per_unit_store_num, int64_t unit_num) {
70 #ifdef DYNAMIC_THREAD_DISTRIBUTE
71   ConvolutionIm2ColBaseStruct *conv_im2col = (ConvolutionIm2ColBaseStruct *)self;
72   NNACL_CHECK_NULL_RETURN_ERR(conv_im2col);
73 
74   if (conv_im2col->conv_.compute_.in_n_ % self->thread_nr_ == 0) {
75     conv_im2col->conv_.use_batch_cut_flag_ = true;
76     return NNACL_OK;
77   } else {
78     conv_im2col->conv_.use_batch_cut_flag_ = false;
79   }
80 
81   int update_thread = UP_DIV(UP_DIV(conv_im2col->conv_.compute_.out_hw_, conv_im2col->row_tile_), ConvMinBlock);
82   self->thread_nr_ = NNACL_MIN(self->thread_nr_, update_thread);
83 #else
84   self->thread_nr_ = self->thread_nr_ > 0 ? self->thread_nr_ : 1;
85 #endif
86   return NNACL_OK;
87 }
88 
ConvIm2ColBaseFreeTmpBuffer(ConvolutionIm2ColBaseStruct * conv_im2col)89 void ConvIm2ColBaseFreeTmpBuffer(ConvolutionIm2ColBaseStruct *conv_im2col) {
90   ExecEnv *env = conv_im2col->conv_.base_.env_;
91   NNACL_CHECK_NULL_RETURN_VOID(env);
92 
93   if (conv_im2col->packed_input_ != NULL) {
94     env->Free(env->allocator_, conv_im2col->packed_input_);
95     conv_im2col->packed_input_ = NULL;
96   }
97   if (conv_im2col->col_major_input_ != NULL) {
98     env->Free(env->allocator_, conv_im2col->col_major_input_);
99     conv_im2col->col_major_input_ = NULL;
100   }
101   if (conv_im2col->output_need_align_ && conv_im2col->tmp_output_ != NULL) {
102     env->Free(env->allocator_, conv_im2col->tmp_output_);
103     conv_im2col->tmp_output_ = NULL;
104     conv_im2col->output_need_align_ = false;
105   }
106 }
107 
ConvIm2ColBaseInitTmpBuffer(ConvolutionIm2ColBaseStruct * conv_im2col)108 int ConvIm2ColBaseInitTmpBuffer(ConvolutionIm2ColBaseStruct *conv_im2col) {
109   ConvolutionBaseStruct *conv = (ConvolutionBaseStruct *)conv_im2col;
110   TensorC *out_tensor = conv_im2col->conv_.base_.out_[OUTPUT_INDEX];
111   NNACL_CHECK_NULL_RETURN_ERR(out_tensor);
112   NNACL_CHECK_NULL_RETURN_ERR(out_tensor->data_);
113 
114   NNACL_CHECK_INT_MUL_NOT_OVERFLOW(conv->compute_.kernel_hw_, conv->compute_.in_c_, NNACL_ERR);
115   int kernel_chw = conv->compute_.kernel_hw_ * conv->compute_.in_c_;
116   NNACL_CHECK_INT_MUL_NOT_OVERFLOW(kernel_chw, conv->base_.thread_nr_, NNACL_ERR);
117   int total_kernel_chw = kernel_chw * conv->base_.thread_nr_;
118   NNACL_CHECK_INT_MUL_NOT_OVERFLOW(total_kernel_chw, conv_im2col->row_tile_, NNACL_ERR);
119   int unit_size = total_kernel_chw * conv_im2col->row_tile_;
120 
121   if (conv_im2col->packed_input_ != NULL) {
122     conv->base_.env_->Free(conv->base_.env_->allocator_, conv_im2col->packed_input_);
123     conv_im2col->packed_input_ = NULL;
124   }
125   conv_im2col->packed_input_ =
126     (float *)conv->base_.env_->Alloc(conv->base_.env_->allocator_, unit_size * sizeof(float));
127   NNACL_MALLOC_CHECK_NULL_RETURN_ERR(conv_im2col->packed_input_);
128 
129   if (conv_im2col->col_major_input_ != NULL) {
130     conv->base_.env_->Free(conv->base_.env_->allocator_, conv_im2col->col_major_input_);
131     conv_im2col->col_major_input_ = NULL;
132   }
133   conv_im2col->col_major_input_ =
134     (float *)conv->base_.env_->Alloc(conv->base_.env_->allocator_, unit_size * sizeof(float));
135   NNACL_MALLOC_CHECK_NULL_RETURN_ERR(conv_im2col->col_major_input_);
136 
137   return NNACL_OK;
138 }
139 
ConvIm2ColBasePackWeight(ConvolutionBaseStruct * conv)140 void ConvIm2ColBasePackWeight(ConvolutionBaseStruct *conv) {
141   void *origin_weight = (conv->base_.train_session_) ? conv->base_.in_[SECOND_INPUT]->data_ : conv->origin_weight_;
142   NNACL_CHECK_NULL_RETURN_VOID(origin_weight);
143 
144   ConvolutionIm2ColBaseStruct *conv_im2col = (ConvolutionIm2ColBaseStruct *)conv;
145   NNACL_CHECK_NULL_RETURN_VOID(conv_im2col->row_major_to_col_nmajor_);
146   conv_im2col->row_major_to_col_nmajor_((float *)origin_weight, (float *)conv->packed_weight_, conv->compute_.out_c_,
147                                         conv->compute_.in_c_ * conv->compute_.kernel_hw_);
148 }
149 
ConvIm2ColBaseInitGlobalVariable(ConvolutionBaseStruct * conv)150 void ConvIm2ColBaseInitGlobalVariable(ConvolutionBaseStruct *conv) {
151   ConvolutionIm2ColBaseStruct *conv_im2col = (ConvolutionIm2ColBaseStruct *)conv;
152   conv_im2col->oc_tile_ = C8NUM;
153   conv_im2col->row_tile_ = C12NUM;
154   conv_im2col->row_major_to_col_nmajor_ = RowMajor2Col8Major;
155 }
156 
ConvolutionIm2colBaseRelease(KernelBase * self)157 int ConvolutionIm2colBaseRelease(KernelBase *self) {
158   ConvolutionBaseStruct *conv = (ConvolutionBaseStruct *)self;
159   NNACL_CHECK_NULL_RETURN_ERR(conv);
160   ConvBaseRelease(conv);
161   return NNACL_OK;
162 }
163 
ConvolutionIm2colBaseCompute(KernelBase * self)164 int ConvolutionIm2colBaseCompute(KernelBase *self) {
165   ConvolutionIm2ColBaseStruct *conv_im2col = (ConvolutionIm2ColBaseStruct *)self;
166   NNACL_CHECK_NULL_RETURN_ERR(conv_im2col);
167 
168   int ret = conv_im2col->init_tmp_buffer_(conv_im2col);
169   if (ret != NNACL_OK) {
170     ConvIm2ColBaseFreeTmpBuffer(conv_im2col);
171     return ret;
172   }
173 
174   float *output_addr = self->out_[OUTPUT_INDEX]->data_;
175   NNACL_CHECK_NULL_RETURN_ERR(output_addr);
176   if (!conv_im2col->output_need_align_) {
177     conv_im2col->tmp_output_ = output_addr;
178   }
179 
180   ret = ConvBaseRepackWeight(&conv_im2col->conv_);
181   if (ret != NNACL_OK) {
182     ConvIm2ColBaseFreeTmpBuffer(conv_im2col);
183     return ret;
184   }
185 
186   ret = self->env_->ParallelLaunch(self->env_->thread_pool_, ConvIm2ColBaseImpl, self, self->thread_nr_);
187   ConvIm2ColBaseFreeTmpBuffer(conv_im2col);
188   return ret;
189 }
190 
ConvolutionIm2colBaseResize(KernelBase * self)191 int ConvolutionIm2colBaseResize(KernelBase *self) {
192   ConvolutionBaseStruct *conv = (ConvolutionBaseStruct *)self;
193   NNACL_CHECK_NULL_RETURN_ERR(conv);
194 
195   int ret = ConvBaseCheckResizeValid(conv);
196   if (ret != NNACL_OK) {
197     return ret;
198   }
199 
200   ret = ConvBasePrepare(conv);
201   if (ret != NNACL_OK) {
202     return ret;
203   }
204 
205   return ConvIm2ColBaseUpdateThreadNumProcess(self, TC_PTYPE(PrimType_Conv2DFusion), 0, 0, 0);
206 }
207 
ConvolutionIm2colBasePrepare(KernelBase * self)208 int ConvolutionIm2colBasePrepare(KernelBase *self) {
209   NNACL_CHECK_FALSE(self->in_size_ < TWO_TENSOR, NNACL_INPUT_TENSOR_ERROR);
210   NNACL_CHECK_FALSE(self->out_size_ < ONE_TENSOR, NNACL_OUTPUT_TENSOR_ERROR);
211 
212   ConvolutionIm2ColBaseStruct *conv_im2col = (ConvolutionIm2ColBaseStruct *)self;
213   NNACL_CHECK_NULL_RETURN_ERR(conv_im2col);
214 
215   conv_im2col->conv_.init_global_variable_(&conv_im2col->conv_);
216 
217   if (self->train_session_) {
218     int oc_block_num = UP_ROUND(conv_im2col->conv_.compute_.out_c_, conv_im2col->oc_tile_);
219     int kernel_chw = conv_im2col->conv_.compute_.in_c_ * conv_im2col->conv_.compute_.kernel_hw_;
220     NNACL_CHECK_INT_MUL_NOT_OVERFLOW(oc_block_num, kernel_chw, NNACL_ERR);
221     int pack_weight_size = oc_block_num * kernel_chw;
222     self->work_size_ = pack_weight_size * sizeof(float);
223   }
224 
225   return ConvBaseInitConvWeightBias(&conv_im2col->conv_);
226 }
227 
CreateConvIm2ColBase(ConvParameter * conv_param)228 ConvolutionBaseStruct *CreateConvIm2ColBase(ConvParameter *conv_param) {
229   ConvolutionIm2ColBaseStruct *conv_im2col = (ConvolutionIm2ColBaseStruct *)malloc(sizeof(ConvolutionIm2ColBaseStruct));
230   NNACL_MALLOC_CHECK_NULL_RETURN_NULL(conv_im2col);
231   memset(conv_im2col, 0, sizeof(ConvolutionIm2ColBaseStruct));
232 
233   conv_im2col->init_tmp_buffer_ = ConvIm2ColBaseInitTmpBuffer;
234 
235   conv_im2col->conv_.malloc_weight_bias_ = ConvIm2ColBaseMallocWeightBiasData;
236   conv_im2col->conv_.run_impl_ = ConvIm2ColBaseRunImpl;
237   conv_im2col->conv_.pack_weight_ = ConvIm2ColBasePackWeight;
238   conv_im2col->conv_.init_global_variable_ = ConvIm2ColBaseInitGlobalVariable;
239 
240   conv_im2col->conv_.base_.Compute = ConvolutionIm2colBaseCompute;
241   conv_im2col->conv_.base_.Prepare = ConvolutionIm2colBasePrepare;
242   conv_im2col->conv_.base_.Resize = ConvolutionIm2colBaseResize;
243   conv_im2col->conv_.base_.Release = ConvolutionIm2colBaseRelease;
244 
245   return (ConvolutionBaseStruct *)conv_im2col;
246 }
247