• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright 2023 Huawei Technologies Co., Ltd
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either convolutionress or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "nnacl/kernel/convolution_depthwise.h"
18 #include "nnacl/tensor_c_utils.h"
19 #include "nnacl/base/conv_common_base.h"
20 #include "nnacl/fp32/conv_depthwise_fp32.h"
21 #include "nnacl/fp32/pack_fp32.h"
22 #ifdef ENABLE_AVX512
23 #include "nnacl/intrinsics/ms_simd_cpu_info.h"
24 #endif
25 #include "nnacl/fp32/conv_depthwise_avx_fp32.h"
26 
ConvDwRun(void * cdata,int task_id,float l,float r)27 int ConvDwRun(void *cdata, int task_id, float l, float r) {
28   ConvolutionDepthwiseStruct *conv_dw = (ConvolutionDepthwiseStruct *)cdata;
29   NNACL_CHECK_NULL_RETURN_ERR(conv_dw);
30 
31   ConvParameter *conv_param = (ConvParameter *)conv_dw->conv_.base_.param_;
32   NNACL_CHECK_NULL_RETURN_ERR(conv_param);
33 
34 #ifdef ENABLE_AVX512
35   if (X86_Avx512_Support()) {
36     return ConvDwAVX512(conv_dw->output_ptr_, conv_dw->input_ptr_, (float *)conv_dw->conv_.packed_weight_,
37                         (float *)conv_dw->conv_.bias_data_, conv_param, task_id, &conv_dw->dw_param_);
38   } else {
39     return ConvDwAVX(conv_dw->output_ptr_, conv_dw->input_ptr_, (float *)conv_dw->conv_.packed_weight_,
40                      (float *)conv_dw->conv_.bias_data_, conv_param, task_id, &conv_dw->dw_param_);
41   }
42 #endif
43 
44 #ifdef ENABLE_AVX
45   return ConvDwAVX(conv_dw->output_ptr_, conv_dw->input_ptr_, (float *)conv_dw->conv_.packed_weight_,
46                    (float *)conv_dw->conv_.bias_data_, conv_param, task_id, &conv_dw->dw_param_);
47 #endif
48 
49   return ConvDw(conv_dw->output_ptr_, conv_dw->input_ptr_, (float *)conv_dw->conv_.packed_weight_,
50                 (float *)conv_dw->conv_.bias_data_, conv_param, task_id);
51 }
52 
ConvDwReleaseParam(ConvolutionDepthwiseStruct * conv_dw)53 void ConvDwReleaseParam(ConvolutionDepthwiseStruct *conv_dw) {
54   ExecEnv *env = conv_dw->conv_.base_.env_;
55   NNACL_CHECK_NULL_RETURN_VOID(env);
56 
57   if (conv_dw->dw_param_.num_pixels_ != NULL) {
58     env->Free(env->allocator_, conv_dw->dw_param_.num_pixels_);
59     conv_dw->dw_param_.num_pixels_ = NULL;
60   }
61   if (conv_dw->dw_param_.out_w_start_ != NULL) {
62     env->Free(env->allocator_, conv_dw->dw_param_.out_w_start_);
63     conv_dw->dw_param_.out_w_start_ = NULL;
64   }
65   if (conv_dw->dw_param_.out_w_end_ != NULL) {
66     env->Free(env->allocator_, conv_dw->dw_param_.out_w_end_);
67     conv_dw->dw_param_.out_w_end_ = NULL;
68   }
69 }
70 
ConvDwPackWeight(ConvolutionBaseStruct * conv)71 void ConvDwPackWeight(ConvolutionBaseStruct *conv) {
72   void *origin_data = conv->base_.in_[SECOND_INPUT]->data_;
73   NNACL_CHECK_NULL_RETURN_VOID(origin_data);
74   PackWeightKHWToHWKFp32(origin_data, conv->packed_weight_, conv->compute_.kernel_hw_, conv->compute_.out_c_);
75 }
76 
ConvDwMallocWeightBiasData(ConvolutionBaseStruct * conv)77 int ConvDwMallocWeightBiasData(ConvolutionBaseStruct *conv) {
78   TensorC *weight_tensor = conv->base_.in_[SECOND_INPUT];
79   NNACL_MALLOC_CHECK_NULL_RETURN_ERR(weight_tensor);
80 
81   int pack_weight_size = conv->compute_.kernel_hw_ * conv->compute_.out_c_;
82   NNACL_CHECK_INT_MUL_NOT_OVERFLOW(pack_weight_size, sizeof(float), NNACL_ERR);
83 
84   if (!conv->base_.train_session_) {
85     NNACL_CHECK_MALLOC_SIZE(pack_weight_size * sizeof(float));
86     conv->packed_weight_ = ConvBaseGetConvPackWeightData(conv, pack_weight_size * sizeof(float));
87     NNACL_MALLOC_CHECK_NULL_RETURN_ERR(conv->packed_weight_);
88   }
89 
90   NNACL_CHECK_MALLOC_SIZE(conv->compute_.out_c_ * sizeof(float));
91   if (conv->bias_data_ == NULL) {
92     conv->bias_data_ = conv->base_.env_->Alloc(conv->base_.env_->allocator_, conv->compute_.out_c_ * sizeof(float));
93     NNACL_MALLOC_CHECK_NULL_RETURN_ERR(conv->bias_data_);
94   }
95   memset(conv->bias_data_, 0, conv->compute_.out_c_ * sizeof(float));
96   return NNACL_OK;
97 }
98 
ConvDwInitConvDwCalcInfo(ConvolutionDepthwiseStruct * conv_dw)99 int ConvDwInitConvDwCalcInfo(ConvolutionDepthwiseStruct *conv_dw) {
100   ExecEnv *env = conv_dw->conv_.base_.env_;
101   NNACL_CHECK_NULL_RETURN_ERR(env);
102   ConvComputeParam *compute = &conv_dw->conv_.compute_;
103   NNACL_CHECK_NULL_RETURN_ERR(compute);
104 
105   ConvDwReleaseParam(conv_dw);
106 
107   conv_dw->dw_param_.num_pixels_ = env->Alloc(env->allocator_, compute->kernel_w_ * sizeof(int));
108   NNACL_MALLOC_CHECK_NULL_RETURN_ERR(conv_dw->dw_param_.num_pixels_);
109 
110   conv_dw->dw_param_.out_w_start_ = env->Alloc(env->allocator_, compute->kernel_w_ * sizeof(int));
111   NNACL_MALLOC_CHECK_NULL_RETURN_ERR(conv_dw->dw_param_.out_w_start_);
112 
113   conv_dw->dw_param_.out_w_end_ = env->Alloc(env->allocator_, compute->kernel_w_ * sizeof(int));
114   NNACL_MALLOC_CHECK_NULL_RETURN_ERR(conv_dw->dw_param_.out_w_end_);
115 
116   int *num_pixels = (int *)(conv_dw->dw_param_.num_pixels_);
117   int *out_w_start = (int *)(conv_dw->dw_param_.out_w_start_);
118   int *out_w_end = (int *)(conv_dw->dw_param_.out_w_end_);
119   conv_dw->dw_param_.first_calc_kw_ = -1;
120   NNACL_CHECK_INT_MUL_NOT_OVERFLOW(compute->dilation_w_, (compute->kernel_w_ - 1), NNACL_ERR);
121   for (int kw = 0; kw < compute->kernel_w_; kw++) {
122     out_w_start[kw] =
123       NNACL_MAX(0, (compute->pad_l_ - compute->dilation_w_ * kw + compute->stride_w_ - 1) / compute->stride_w_);
124 
125     out_w_end[kw] = NNACL_MIN(
126       (compute->in_w_ + compute->pad_l_ - compute->dilation_w_ * kw + compute->stride_w_ - 1) / compute->stride_w_,
127       compute->out_w_);
128 
129     num_pixels[kw] = out_w_end[kw] - out_w_start[kw];
130     if (conv_dw->dw_param_.first_calc_kw_ == -1 && out_w_start[kw] == 0 && num_pixels[kw] == compute->out_w_) {
131       conv_dw->dw_param_.first_calc_kw_ = kw;
132     }
133   }
134   return NNACL_OK;
135 }
136 
ConvolutionDepthwisePrepare(KernelBase * self)137 int ConvolutionDepthwisePrepare(KernelBase *self) {
138   NNACL_CHECK_FALSE(self->in_size_ < TWO_TENSOR, NNACL_INPUT_TENSOR_ERROR);
139   NNACL_CHECK_FALSE(self->out_size_ < ONE_TENSOR, NNACL_OUTPUT_TENSOR_ERROR);
140 
141   ConvolutionDepthwiseStruct *conv_dw = (ConvolutionDepthwiseStruct *)self;
142   NNACL_CHECK_NULL_RETURN_ERR(conv_dw);
143 
144   ConvBaseUpdateOriginWeightAndBias(&conv_dw->conv_);
145 
146   if (self->train_session_) {
147     TensorC *weight_tensor = self->in_[SECOND_INPUT];
148     NNACL_CHECK_NULL_RETURN_ERR(weight_tensor);
149     NNACL_CHECK_TRUE_RET(weight_tensor->shape_size_ == DIMENSION_4D, NNACL_CONVOLUTION_WEIGHT_SHAPE_INVALID);
150 
151     int weight_size_hw = GetHeight(weight_tensor) * GetWidth(weight_tensor);
152     NNACL_CHECK_INT_MUL_NOT_OVERFLOW(GetBatch(weight_tensor), weight_size_hw, NNACL_ERR);
153     int pack_weight_size = GetBatch(weight_tensor) * weight_size_hw;
154     NNACL_CHECK_INT_MUL_NOT_OVERFLOW(pack_weight_size, sizeof(float), NNACL_ERR);
155     self->work_size_ = pack_weight_size * sizeof(float);
156   }
157 
158   return ConvBaseInitConvWeightBias(&conv_dw->conv_);
159 }
160 
ConvolutionDepthwiseCompute(KernelBase * self)161 int ConvolutionDepthwiseCompute(KernelBase *self) {
162   ConvolutionDepthwiseStruct *conv_dw = (ConvolutionDepthwiseStruct *)self;
163   NNACL_CHECK_NULL_RETURN_ERR(conv_dw);
164 
165   int ret = ConvBaseRepackWeight(&conv_dw->conv_);
166   if (ret != NNACL_OK) {
167     return ret;
168   }
169 
170   TensorC *input_tensor = self->in_[FIRST_INPUT];
171   NNACL_CHECK_NULL_RETURN_ERR(input_tensor);
172   conv_dw->input_ptr_ = (float *)input_tensor->data_;
173   NNACL_CHECK_NULL_RETURN_ERR(conv_dw->input_ptr_);
174 
175   TensorC *output_tensor = self->out_[OUTPUT_INDEX];
176   NNACL_CHECK_NULL_RETURN_ERR(output_tensor);
177   conv_dw->output_ptr_ = (float *)output_tensor->data_;
178   NNACL_CHECK_NULL_RETURN_ERR(conv_dw->output_ptr_);
179 
180   NNACL_CHECK_NULL_RETURN_ERR(conv_dw->dw_param_.num_pixels_);
181   NNACL_CHECK_NULL_RETURN_ERR(conv_dw->dw_param_.out_w_start_);
182   NNACL_CHECK_NULL_RETURN_ERR(conv_dw->dw_param_.out_w_end_);
183 
184   return self->env_->ParallelLaunch(self->env_->thread_pool_, ConvDwRun, self, self->thread_nr_);
185 }
186 
ConvolutionDepthwiseResize(KernelBase * self)187 int ConvolutionDepthwiseResize(KernelBase *self) {
188   ConvolutionDepthwiseStruct *conv_dw = (ConvolutionDepthwiseStruct *)self;
189   NNACL_CHECK_NULL_RETURN_ERR(conv_dw);
190 
191   int ret = ConvBasePrepare(&conv_dw->conv_);
192   if (ret != NNACL_OK) {
193     return ret;
194   }
195 
196   self->thread_nr_ = NNACL_MIN(self->thread_nr_, conv_dw->conv_.compute_.out_h_);
197   NNACL_CHECK_ZERO_RETURN_ERR(self->thread_nr_);
198 
199   ret = ConvDwInitConvDwCalcInfo(conv_dw);
200   if (ret != NNACL_OK) {
201     return ret;
202   }
203 
204   return NNACL_OK;
205 }
206 
ConvolutionDepthwiseRelease(KernelBase * self)207 int ConvolutionDepthwiseRelease(KernelBase *self) {
208   ConvolutionDepthwiseStruct *conv_dw = (ConvolutionDepthwiseStruct *)self;
209   NNACL_CHECK_NULL_RETURN_ERR(conv_dw);
210 
211   ConvDwReleaseParam(conv_dw);
212 
213   ConvBaseRelease(&conv_dw->conv_);
214   return NNACL_OK;
215 }
216 
CreateConvDw(ConvParameter * conv)217 KernelBase *CreateConvDw(ConvParameter *conv) {
218   ConvolutionDepthwiseStruct *conv_dw = (ConvolutionDepthwiseStruct *)malloc(sizeof(ConvolutionDepthwiseStruct));
219   NNACL_MALLOC_CHECK_NULL_RETURN_NULL(conv_dw);
220   memset(conv_dw, 0, sizeof(ConvolutionDepthwiseStruct));
221 
222   conv_dw->conv_.pack_weight_ = ConvDwPackWeight;
223   conv_dw->conv_.malloc_weight_bias_ = ConvDwMallocWeightBiasData;
224   conv_dw->conv_.base_.Prepare = ConvolutionDepthwisePrepare;
225   conv_dw->conv_.base_.Compute = ConvolutionDepthwiseCompute;
226   conv_dw->conv_.base_.Resize = ConvolutionDepthwiseResize;
227   conv_dw->conv_.base_.Release = ConvolutionDepthwiseRelease;
228   return (KernelBase *)conv_dw;
229 }
230