• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright 2023 Huawei Technologies Co., Ltd
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either convolutionress or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #if defined(ENABLE_AVX) || defined(ENABLE_ARM64)
18 #include "nnacl/kernel/convolution_slidewindow.h"
19 #include "nnacl/fp32/conv_depthwise_fp32.h"
20 #include "nnacl/fp32/pack_fp32.h"
21 #include "nnacl/tensor_c.h"
22 #include "nnacl/tensor_c_utils.h"
23 
ConvSWInitTmpBuffer(ConvolutionSWStruct * conv_sw)24 int ConvSWInitTmpBuffer(ConvolutionSWStruct *conv_sw) {
25   TensorC *input_tensor = conv_sw->conv_.base_.in_[FIRST_INPUT];
26   NNACL_CHECK_NULL_RETURN_ERR(input_tensor);
27   float *input_data = (float *)input_tensor->data_;
28   NNACL_CHECK_NULL_RETURN_ERR(input_data);
29   ConvComputeParam *compute = &conv_sw->conv_.compute_;
30   NNACL_CHECK_NULL_RETURN_ERR(compute);
31 
32   if (conv_sw->ic_res_ != 0 && compute->kernel_h_ == 1 && compute->kernel_w_ == 1) {
33     int ic_block_num = UP_DIV(compute->in_c_, conv_sw->in_tile_);
34     NNACL_CHECK_INT_MUL_NOT_OVERFLOW(compute->in_n_, compute->in_hw_, NNACL_ERR);
35     int input_bhw = compute->in_n_ * conv_sw->conv_.compute_.in_hw_;
36     NNACL_CHECK_INT_MUL_NOT_OVERFLOW(input_bhw, ic_block_num * conv_sw->in_tile_, NNACL_ERR);
37 
38     conv_sw->input_data_ = (float *)conv_sw->conv_.base_.env_->Alloc(
39       conv_sw->conv_.base_.env_->allocator_, input_bhw * ic_block_num * conv_sw->in_tile_ * sizeof(float));
40     NNACL_MALLOC_CHECK_NULL_RETURN_ERR(conv_sw->input_data_);
41 
42     PackNHWCToNHWCXFp32(input_data, conv_sw->input_data_, compute->in_n_, compute->in_hw_, compute->in_c_,
43                         conv_sw->oc_tile_);
44   } else {
45     conv_sw->input_data_ = input_data;
46   }
47 
48   float *out_data = (float *)conv_sw->conv_.base_.out_[OUTPUT_INDEX]->data_;
49   NNACL_CHECK_NULL_RETURN_ERR(out_data);
50   if (conv_sw->oc_res_ == 0) {  // not need to malloc dst
51     conv_sw->output_data_ = out_data;
52   } else {  // need to malloc dst to align block
53     int oc_block_num = UP_DIV(compute->out_c_, conv_sw->oc_tile_);
54     NNACL_CHECK_INT_MUL_NOT_OVERFLOW(compute->out_n_, compute->out_hw_, NNACL_ERR);
55     int output_bhw = compute->out_n_ * compute->out_hw_;
56     NNACL_CHECK_INT_MUL_NOT_OVERFLOW(output_bhw, oc_block_num * conv_sw->oc_tile_, NNACL_ERR);
57     conv_sw->output_data_ = (float *)conv_sw->conv_.base_.env_->Alloc(
58       conv_sw->conv_.base_.env_->allocator_, output_bhw * oc_block_num * conv_sw->oc_tile_ * sizeof(float));
59     NNACL_MALLOC_CHECK_NULL_RETURN_ERR(conv_sw->output_data_);
60   }
61 
62   return NNACL_OK;
63 }
64 
ConvSWFreeTmpBuffer(ConvolutionSWStruct * conv_sw)65 void ConvSWFreeTmpBuffer(ConvolutionSWStruct *conv_sw) {
66   ConvParameter *conv_param = (ConvParameter *)conv_sw->conv_.base_.param_;
67   NNACL_CHECK_NULL_RETURN_VOID(conv_param);
68 
69   if (conv_sw->output_data_ != NULL && conv_sw->oc_res_ != 0) {
70     conv_sw->conv_.base_.env_->Free(conv_sw->conv_.base_.env_->allocator_, conv_sw->output_data_);
71     conv_sw->output_data_ = NULL;
72   }
73   if (conv_sw->input_data_ != NULL && conv_sw->ic_res_ != 0 && conv_param->kernel_w_ == 1 &&
74       conv_param->kernel_h_ == 1) {
75     conv_sw->conv_.base_.env_->Free(conv_sw->conv_.base_.env_->allocator_, conv_sw->input_data_);
76     conv_sw->input_data_ = NULL;
77   }
78 }
79 
ConvSWPackWeight(ConvolutionBaseStruct * conv)80 void ConvSWPackWeight(ConvolutionBaseStruct *conv) {
81   ConvolutionSWStruct *conv_sw = (ConvolutionSWStruct *)conv;
82   NNACL_CHECK_NULL_RETURN_VOID(conv_sw);
83   TensorC *filter_tensor = conv->base_.in_[SECOND_INPUT];
84   NNACL_CHECK_NULL_RETURN_VOID(filter_tensor);
85 
86   int input_channel = GetChannel(filter_tensor);
87   int output_channel = GetBatch(filter_tensor);
88   int kernel_h = GetHeight(filter_tensor);
89   int kernel_w = GetWidth(filter_tensor);
90 
91   int oc_block_num = UP_DIV(output_channel, conv_sw->oc_tile_);
92   void *origin_weight = (conv->base_.train_session_) ? filter_tensor->data_ : conv->origin_weight_;
93   NNACL_CHECK_NULL_RETURN_VOID(origin_weight);
94   PackNHWCToNXHWCXFp32(kernel_h, kernel_w, output_channel, oc_block_num, input_channel, (float *)conv->packed_weight_,
95                        (float *)origin_weight);
96 }
97 
ConvSWMallocWeightBiasData(ConvolutionBaseStruct * conv)98 int ConvSWMallocWeightBiasData(ConvolutionBaseStruct *conv) {
99   ConvolutionSWStruct *conv_sw = (ConvolutionSWStruct *)conv;
100   NNACL_CHECK_NULL_RETURN_ERR(conv_sw);
101   ConvParameter *conv_param = (ConvParameter *)conv->base_.param_;
102   NNACL_CHECK_NULL_RETURN_ERR(conv_param);
103   TensorC *filter_tensor = conv->base_.in_[SECOND_INPUT];
104   NNACL_CHECK_NULL_RETURN_ERR(filter_tensor);
105 
106   int input_channel = GetChannel(filter_tensor);
107   int output_channel = GetBatch(filter_tensor);
108   int kernel_h = GetHeight(filter_tensor);
109   int kernel_w = GetWidth(filter_tensor);
110 
111   conv_param->input_channel_ = input_channel;
112   conv_param->output_channel_ = output_channel;
113   int kernel_plane = kernel_h * kernel_w;
114   int oc_block_num = UP_DIV(output_channel, conv_sw->oc_tile_);
115   int pack_weight_size = oc_block_num * conv_sw->oc_tile_ * input_channel * kernel_plane;
116   if (!conv_sw->conv_.base_.train_session_) {
117     conv_sw->conv_.packed_weight_ = ConvBaseGetConvPackWeightData(conv, pack_weight_size * sizeof(float));
118     NNACL_MALLOC_CHECK_NULL_RETURN_ERR(conv_sw->conv_.packed_weight_);
119   }
120 
121   if (conv_sw->conv_.base_.in_size_ == THREE_TENSOR) {
122     int malloc_size = oc_block_num * conv_sw->oc_tile_ * sizeof(float);
123     conv->bias_data_ = conv->base_.env_->Alloc(conv->base_.env_->allocator_, malloc_size);
124     NNACL_MALLOC_CHECK_NULL_RETURN_ERR(conv->bias_data_);
125     memset(conv->bias_data_, 0, oc_block_num * conv_sw->oc_tile_ * sizeof(float));
126   }
127   return NNACL_OK;
128 }
129 
ConvSWImpl(void * cdata,int task_id,float l,float r)130 int ConvSWImpl(void *cdata, int task_id, float l, float r) {
131   ConvolutionSWStruct *conv_sw = (ConvolutionSWStruct *)cdata;
132   NNACL_CHECK_NULL_RETURN_ERR(conv_sw);
133   return conv_sw->conv_.run_impl_(&conv_sw->conv_, task_id);
134 }
135 
ConvolutionSWCompute(KernelBase * self)136 int ConvolutionSWCompute(KernelBase *self) {
137   ConvolutionSWStruct *conv_sw = (ConvolutionSWStruct *)self;
138   NNACL_CHECK_NULL_RETURN_ERR(conv_sw);
139 
140   int ret = ConvSWInitTmpBuffer(conv_sw);
141   if (ret != NNACL_OK) {
142     ConvSWFreeTmpBuffer(conv_sw);
143     return ret;
144   }
145 
146   ret = ConvBaseRepackWeight(&conv_sw->conv_);
147   if (ret != NNACL_OK) {
148     ConvSWFreeTmpBuffer(conv_sw);
149     return ret;
150   }
151 
152   ret = self->env_->ParallelLaunch(self->env_->thread_pool_, ConvSWImpl, self, self->thread_nr_);
153   if (ret != NNACL_OK) {
154     ConvSWFreeTmpBuffer(conv_sw);
155     return ret;
156   }
157 
158   if (conv_sw->oc_res_ != 0) {
159     ConvParameter *conv_param = (ConvParameter *)self->param_;
160     NNACL_CHECK_NULL_RETURN_ERR(conv_param);
161     float *out_data = (float *)self->out_[OUTPUT_INDEX]->data_;
162     NNACL_CHECK_NULL_RETURN_ERR(out_data);
163     PackNHWCXToNHWCFp32(conv_sw->output_data_, out_data, conv_param->output_batch_,
164                         conv_param->output_h_ * conv_param->output_w_, conv_param->output_channel_, conv_sw->oc_tile_);
165   }
166 
167   ConvSWFreeTmpBuffer(conv_sw);
168   return NNACL_OK;
169 }
170 
ConvolutionSWRelease(KernelBase * self)171 int ConvolutionSWRelease(KernelBase *self) {
172   ConvolutionBaseStruct *conv = (ConvolutionBaseStruct *)self;
173   NNACL_CHECK_NULL_RETURN_ERR(conv);
174   ConvBaseRelease(conv);
175   return NNACL_OK;
176 }
177 
ConvolutionSWResize(KernelBase * self)178 int ConvolutionSWResize(KernelBase *self) {
179   ConvolutionSWStruct *conv_sw = (ConvolutionSWStruct *)self;
180   NNACL_CHECK_NULL_RETURN_ERR(conv_sw);
181   ConvParameter *conv_param = (ConvParameter *)self->param_;
182   NNACL_CHECK_NULL_RETURN_ERR(conv_param);
183 
184   int ret = ConvBaseCheckResizeValid(&conv_sw->conv_);
185   if (ret != NNACL_OK) {
186     return ret;
187   }
188 
189   ret = ConvBasePrepare(&conv_sw->conv_);
190   if (ret != NNACL_OK) {
191     return ret;
192   }
193 
194   InitSlidingParamConv(&conv_sw->sw_param_, conv_param, conv_sw->in_tile_, conv_sw->oc_tile_);
195   return NNACL_OK;
196 }
197 
ConvolutionSWPrepare(KernelBase * self)198 int ConvolutionSWPrepare(KernelBase *self) {
199   ConvolutionSWStruct *conv_sw = (ConvolutionSWStruct *)self;
200   NNACL_CHECK_NULL_RETURN_ERR(conv_sw);
201 
202   conv_sw->conv_.init_global_variable_(&conv_sw->conv_);
203 
204   if (self->train_session_) {
205     TensorC *filter_tensor = self->in_[SECOND_INPUT];
206     NNACL_CHECK_NULL_RETURN_ERR(filter_tensor);
207     NNACL_CHECK_FALSE(filter_tensor->shape_size_ != DIMENSION_4D, NNACL_CONVOLUTION_WEIGHT_SHAPE_INVALID);
208 
209     int input_channel = GetChannel(filter_tensor);
210     int output_channel = GetBatch(filter_tensor);
211     int kernel_h = GetHeight(filter_tensor);
212     int kernel_w = GetWidth(filter_tensor);
213 
214     NNACL_CHECK_INT_MUL_NOT_OVERFLOW(kernel_h, kernel_w, NNACL_ERR);
215     int kernel_hw = kernel_h * kernel_w;
216     int oc_block_num = UP_DIV(output_channel, conv_sw->oc_tile_);
217     NNACL_CHECK_INT_MUL_NOT_OVERFLOW(input_channel, kernel_hw, NNACL_ERR);
218     int kernel_chw = input_channel * kernel_hw;
219     NNACL_CHECK_INT_MUL_NOT_OVERFLOW(oc_block_num * conv_sw->oc_tile_, kernel_chw, NNACL_ERR);
220     int pack_weight_size = oc_block_num * conv_sw->oc_tile_ * kernel_chw;
221 
222     conv_sw->conv_.base_.work_size_ = pack_weight_size * sizeof(float);
223   }
224 
225   return ConvBaseInitConvWeightBias(&conv_sw->conv_);
226 }
227 #endif
228