1 /**
2 * Copyright 2023 Huawei Technologies Co., Ltd
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either convolutionress or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "nnacl/kernel/convolution_depthwise.h"
18 #include "nnacl/tensor_c_utils.h"
19 #include "nnacl/base/conv_common_base.h"
20 #include "nnacl/fp32/conv_depthwise_fp32.h"
21 #include "nnacl/fp32/pack_fp32.h"
22 #ifdef ENABLE_AVX512
23 #include "nnacl/intrinsics/ms_simd_cpu_info.h"
24 #endif
25 #include "nnacl/fp32/conv_depthwise_avx_fp32.h"
26
ConvDwRun(void * cdata,int task_id,float l,float r)27 int ConvDwRun(void *cdata, int task_id, float l, float r) {
28 ConvolutionDepthwiseStruct *conv_dw = (ConvolutionDepthwiseStruct *)cdata;
29 NNACL_CHECK_NULL_RETURN_ERR(conv_dw);
30
31 ConvParameter *conv_param = (ConvParameter *)conv_dw->conv_.base_.param_;
32 NNACL_CHECK_NULL_RETURN_ERR(conv_param);
33
34 #ifdef ENABLE_AVX512
35 if (X86_Avx512_Support()) {
36 return ConvDwAVX512(conv_dw->output_ptr_, conv_dw->input_ptr_, (float *)conv_dw->conv_.packed_weight_,
37 (float *)conv_dw->conv_.bias_data_, conv_param, task_id, &conv_dw->dw_param_);
38 } else {
39 return ConvDwAVX(conv_dw->output_ptr_, conv_dw->input_ptr_, (float *)conv_dw->conv_.packed_weight_,
40 (float *)conv_dw->conv_.bias_data_, conv_param, task_id, &conv_dw->dw_param_);
41 }
42 #endif
43
44 #ifdef ENABLE_AVX
45 return ConvDwAVX(conv_dw->output_ptr_, conv_dw->input_ptr_, (float *)conv_dw->conv_.packed_weight_,
46 (float *)conv_dw->conv_.bias_data_, conv_param, task_id, &conv_dw->dw_param_);
47 #endif
48
49 return ConvDw(conv_dw->output_ptr_, conv_dw->input_ptr_, (float *)conv_dw->conv_.packed_weight_,
50 (float *)conv_dw->conv_.bias_data_, conv_param, task_id);
51 }
52
ConvDwReleaseParam(ConvolutionDepthwiseStruct * conv_dw)53 void ConvDwReleaseParam(ConvolutionDepthwiseStruct *conv_dw) {
54 ExecEnv *env = conv_dw->conv_.base_.env_;
55 NNACL_CHECK_NULL_RETURN_VOID(env);
56
57 if (conv_dw->dw_param_.num_pixels_ != NULL) {
58 env->Free(env->allocator_, conv_dw->dw_param_.num_pixels_);
59 conv_dw->dw_param_.num_pixels_ = NULL;
60 }
61 if (conv_dw->dw_param_.out_w_start_ != NULL) {
62 env->Free(env->allocator_, conv_dw->dw_param_.out_w_start_);
63 conv_dw->dw_param_.out_w_start_ = NULL;
64 }
65 if (conv_dw->dw_param_.out_w_end_ != NULL) {
66 env->Free(env->allocator_, conv_dw->dw_param_.out_w_end_);
67 conv_dw->dw_param_.out_w_end_ = NULL;
68 }
69 }
70
ConvDwPackWeight(ConvolutionBaseStruct * conv)71 void ConvDwPackWeight(ConvolutionBaseStruct *conv) {
72 void *origin_data = conv->base_.in_[SECOND_INPUT]->data_;
73 NNACL_CHECK_NULL_RETURN_VOID(origin_data);
74 PackWeightKHWToHWKFp32(origin_data, conv->packed_weight_, conv->compute_.kernel_hw_, conv->compute_.out_c_);
75 }
76
ConvDwMallocWeightBiasData(ConvolutionBaseStruct * conv)77 int ConvDwMallocWeightBiasData(ConvolutionBaseStruct *conv) {
78 TensorC *weight_tensor = conv->base_.in_[SECOND_INPUT];
79 NNACL_MALLOC_CHECK_NULL_RETURN_ERR(weight_tensor);
80
81 int pack_weight_size = conv->compute_.kernel_hw_ * conv->compute_.out_c_;
82 NNACL_CHECK_INT_MUL_NOT_OVERFLOW(pack_weight_size, sizeof(float), NNACL_ERR);
83
84 if (!conv->base_.train_session_) {
85 NNACL_CHECK_MALLOC_SIZE(pack_weight_size * sizeof(float));
86 conv->packed_weight_ = ConvBaseGetConvPackWeightData(conv, pack_weight_size * sizeof(float));
87 NNACL_MALLOC_CHECK_NULL_RETURN_ERR(conv->packed_weight_);
88 }
89
90 NNACL_CHECK_MALLOC_SIZE(conv->compute_.out_c_ * sizeof(float));
91 if (conv->bias_data_ == NULL) {
92 conv->bias_data_ = conv->base_.env_->Alloc(conv->base_.env_->allocator_, conv->compute_.out_c_ * sizeof(float));
93 NNACL_MALLOC_CHECK_NULL_RETURN_ERR(conv->bias_data_);
94 }
95 memset(conv->bias_data_, 0, conv->compute_.out_c_ * sizeof(float));
96 return NNACL_OK;
97 }
98
ConvDwInitConvDwCalcInfo(ConvolutionDepthwiseStruct * conv_dw)99 int ConvDwInitConvDwCalcInfo(ConvolutionDepthwiseStruct *conv_dw) {
100 ExecEnv *env = conv_dw->conv_.base_.env_;
101 NNACL_CHECK_NULL_RETURN_ERR(env);
102 ConvComputeParam *compute = &conv_dw->conv_.compute_;
103 NNACL_CHECK_NULL_RETURN_ERR(compute);
104
105 ConvDwReleaseParam(conv_dw);
106
107 conv_dw->dw_param_.num_pixels_ = env->Alloc(env->allocator_, compute->kernel_w_ * sizeof(int));
108 NNACL_MALLOC_CHECK_NULL_RETURN_ERR(conv_dw->dw_param_.num_pixels_);
109
110 conv_dw->dw_param_.out_w_start_ = env->Alloc(env->allocator_, compute->kernel_w_ * sizeof(int));
111 NNACL_MALLOC_CHECK_NULL_RETURN_ERR(conv_dw->dw_param_.out_w_start_);
112
113 conv_dw->dw_param_.out_w_end_ = env->Alloc(env->allocator_, compute->kernel_w_ * sizeof(int));
114 NNACL_MALLOC_CHECK_NULL_RETURN_ERR(conv_dw->dw_param_.out_w_end_);
115
116 int *num_pixels = (int *)(conv_dw->dw_param_.num_pixels_);
117 int *out_w_start = (int *)(conv_dw->dw_param_.out_w_start_);
118 int *out_w_end = (int *)(conv_dw->dw_param_.out_w_end_);
119 conv_dw->dw_param_.first_calc_kw_ = -1;
120 NNACL_CHECK_INT_MUL_NOT_OVERFLOW(compute->dilation_w_, (compute->kernel_w_ - 1), NNACL_ERR);
121 for (int kw = 0; kw < compute->kernel_w_; kw++) {
122 out_w_start[kw] =
123 NNACL_MAX(0, (compute->pad_l_ - compute->dilation_w_ * kw + compute->stride_w_ - 1) / compute->stride_w_);
124
125 out_w_end[kw] = NNACL_MIN(
126 (compute->in_w_ + compute->pad_l_ - compute->dilation_w_ * kw + compute->stride_w_ - 1) / compute->stride_w_,
127 compute->out_w_);
128
129 num_pixels[kw] = out_w_end[kw] - out_w_start[kw];
130 if (conv_dw->dw_param_.first_calc_kw_ == -1 && out_w_start[kw] == 0 && num_pixels[kw] == compute->out_w_) {
131 conv_dw->dw_param_.first_calc_kw_ = kw;
132 }
133 }
134 return NNACL_OK;
135 }
136
ConvolutionDepthwisePrepare(KernelBase * self)137 int ConvolutionDepthwisePrepare(KernelBase *self) {
138 NNACL_CHECK_FALSE(self->in_size_ < TWO_TENSOR, NNACL_INPUT_TENSOR_ERROR);
139 NNACL_CHECK_FALSE(self->out_size_ < ONE_TENSOR, NNACL_OUTPUT_TENSOR_ERROR);
140
141 ConvolutionDepthwiseStruct *conv_dw = (ConvolutionDepthwiseStruct *)self;
142 NNACL_CHECK_NULL_RETURN_ERR(conv_dw);
143
144 ConvBaseUpdateOriginWeightAndBias(&conv_dw->conv_);
145
146 if (self->train_session_) {
147 TensorC *weight_tensor = self->in_[SECOND_INPUT];
148 NNACL_CHECK_NULL_RETURN_ERR(weight_tensor);
149 NNACL_CHECK_TRUE_RET(weight_tensor->shape_size_ == DIMENSION_4D, NNACL_CONVOLUTION_WEIGHT_SHAPE_INVALID);
150
151 int weight_size_hw = GetHeight(weight_tensor) * GetWidth(weight_tensor);
152 NNACL_CHECK_INT_MUL_NOT_OVERFLOW(GetBatch(weight_tensor), weight_size_hw, NNACL_ERR);
153 int pack_weight_size = GetBatch(weight_tensor) * weight_size_hw;
154 NNACL_CHECK_INT_MUL_NOT_OVERFLOW(pack_weight_size, sizeof(float), NNACL_ERR);
155 self->work_size_ = pack_weight_size * sizeof(float);
156 }
157
158 return ConvBaseInitConvWeightBias(&conv_dw->conv_);
159 }
160
ConvolutionDepthwiseCompute(KernelBase * self)161 int ConvolutionDepthwiseCompute(KernelBase *self) {
162 ConvolutionDepthwiseStruct *conv_dw = (ConvolutionDepthwiseStruct *)self;
163 NNACL_CHECK_NULL_RETURN_ERR(conv_dw);
164
165 int ret = ConvBaseRepackWeight(&conv_dw->conv_);
166 if (ret != NNACL_OK) {
167 return ret;
168 }
169
170 TensorC *input_tensor = self->in_[FIRST_INPUT];
171 NNACL_CHECK_NULL_RETURN_ERR(input_tensor);
172 conv_dw->input_ptr_ = (float *)input_tensor->data_;
173 NNACL_CHECK_NULL_RETURN_ERR(conv_dw->input_ptr_);
174
175 TensorC *output_tensor = self->out_[OUTPUT_INDEX];
176 NNACL_CHECK_NULL_RETURN_ERR(output_tensor);
177 conv_dw->output_ptr_ = (float *)output_tensor->data_;
178 NNACL_CHECK_NULL_RETURN_ERR(conv_dw->output_ptr_);
179
180 NNACL_CHECK_NULL_RETURN_ERR(conv_dw->dw_param_.num_pixels_);
181 NNACL_CHECK_NULL_RETURN_ERR(conv_dw->dw_param_.out_w_start_);
182 NNACL_CHECK_NULL_RETURN_ERR(conv_dw->dw_param_.out_w_end_);
183
184 return self->env_->ParallelLaunch(self->env_->thread_pool_, ConvDwRun, self, self->thread_nr_);
185 }
186
ConvolutionDepthwiseResize(KernelBase * self)187 int ConvolutionDepthwiseResize(KernelBase *self) {
188 ConvolutionDepthwiseStruct *conv_dw = (ConvolutionDepthwiseStruct *)self;
189 NNACL_CHECK_NULL_RETURN_ERR(conv_dw);
190
191 int ret = ConvBasePrepare(&conv_dw->conv_);
192 if (ret != NNACL_OK) {
193 return ret;
194 }
195
196 self->thread_nr_ = NNACL_MIN(self->thread_nr_, conv_dw->conv_.compute_.out_h_);
197 NNACL_CHECK_ZERO_RETURN_ERR(self->thread_nr_);
198
199 ret = ConvDwInitConvDwCalcInfo(conv_dw);
200 if (ret != NNACL_OK) {
201 return ret;
202 }
203
204 return NNACL_OK;
205 }
206
ConvolutionDepthwiseRelease(KernelBase * self)207 int ConvolutionDepthwiseRelease(KernelBase *self) {
208 ConvolutionDepthwiseStruct *conv_dw = (ConvolutionDepthwiseStruct *)self;
209 NNACL_CHECK_NULL_RETURN_ERR(conv_dw);
210
211 ConvDwReleaseParam(conv_dw);
212
213 ConvBaseRelease(&conv_dw->conv_);
214 return NNACL_OK;
215 }
216
CreateConvDw(ConvParameter * conv)217 KernelBase *CreateConvDw(ConvParameter *conv) {
218 ConvolutionDepthwiseStruct *conv_dw = (ConvolutionDepthwiseStruct *)malloc(sizeof(ConvolutionDepthwiseStruct));
219 NNACL_MALLOC_CHECK_NULL_RETURN_NULL(conv_dw);
220 memset(conv_dw, 0, sizeof(ConvolutionDepthwiseStruct));
221
222 conv_dw->conv_.pack_weight_ = ConvDwPackWeight;
223 conv_dw->conv_.malloc_weight_bias_ = ConvDwMallocWeightBiasData;
224 conv_dw->conv_.base_.Prepare = ConvolutionDepthwisePrepare;
225 conv_dw->conv_.base_.Compute = ConvolutionDepthwiseCompute;
226 conv_dw->conv_.base_.Resize = ConvolutionDepthwiseResize;
227 conv_dw->conv_.base_.Release = ConvolutionDepthwiseRelease;
228 return (KernelBase *)conv_dw;
229 }
230