• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright 2020 Huawei Technologies Co., Ltd
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef MINDSPORE_NNACL_FP32_CONV_DEPTHWISE_H_
18 #define MINDSPORE_NNACL_FP32_CONV_DEPTHWISE_H_
19 
20 #include "nnacl/conv_parameter.h"
21 #include "nnacl/base/conv_common_base.h"
22 
23 #ifdef __cplusplus
24 extern "C" {
25 #endif
26 
27 #ifndef ENABLE_ARM64
28 void DepthwiseCenter(float *dst, const float *src, const float *weight, const float *bias, int height, int width,
29                      int kernel_h, int kernel_w, int out_h_step, int block_channel, int in_sh_step, int in_sw_step,
30                      int in_kh_step, int in_kw_step, bool is_relu, bool is_relu6);
31 #endif
32 
33 int ConvDw(float *output_data, const float *input_data, const float *weight_data, const float *bias_data,
34            const ConvParameter *conv_param, int task_id);
35 
36 int ConvDwAVX512(float *output_data, const float *input_data, const float *weight_data, const float *bias_data,
37                  const ConvParameter *conv_param, int task_id, ConvDwCalcParam *conv_dw_calc_param_);
38 
39 void ConvDwAVX512Fp32Row(float *output_ptr, const float *input_ptr, const float *weight_ptr, size_t num_pixels,
40                          size_t output_channel, size_t input_step, bool first_calc_flag, const float *bias);
41 
42 void InitSlidingParam(SlidingWindowParam *sliding, const ConvParameter *conv_param, int block);
43 
44 void InitSlidingParamConv(SlidingWindowParam *sliding, const ConvParameter *conv_param, int input_block,
45                           int weight_block);
46 
47 void AppendSlidingParamConv(SlidingWindowParam *sliding, const ConvParameter *conv_param, int in_block,
48                             int weight_block);
49 
50 void InitSlidingParamConvDw(SlidingWindowParam *sliding, const ConvParameter *conv_param, int block);
51 
52 void AppendSlidingParamConvDw(SlidingWindowParam *sliding, const ConvParameter *conv_param, int block);
53 
54 void ConvDwSWFp32(float *output_data, const float *input_data, const float *weight_data, const float *bias_data,
55                   const ConvParameter *conv_param, const SlidingWindowParam *sliding, int task_id);
56 
57 bool CheckConvDwUse3X3(const ConvParameter *conv_param);
58 
59 bool CheckConvDwUseIndirectBuffer(const ConvParameter *conv_param);
60 
61 void ConvDwInitIndirection(float **indirect_buffer, float *src, float *zero_ptr, const ConvParameter *conv_param,
62                            int step_h, int step_w);
63 
64 #ifdef ENABLE_ARM64
65 void ConvDwFp32Indirect3x3(float *output, float **input, const float *weights, const float *bias, int channels,
66                            int output_width, size_t input_stride, size_t relu, size_t relu6);
67 
68 void ConvDwFp32Indirect5x5(float *output, float **input, const float *weights, const float *bias, int channels,
69                            int output_width, size_t input_stride, size_t relu, size_t relu6);
70 #endif
71 
72 #ifdef ENABLE_AVX
73 typedef void (*DepthwiseSWKernel)(float *dst, const float *src, const float *weight, const float *bias, size_t kernel_h,
74                                   size_t kernel_w, size_t act_flag, size_t ow_block, size_t oc_block, size_t oc_algin,
75                                   size_t in_kw_step, size_t in_kh_step, size_t in_sw_step, size_t kw_remainder);
76 
77 void DepthwiseSW3x32Kernel(float *dst, const float *src, const float *weight, const float *bias, size_t kernel_h,
78                            size_t kernel_w, size_t act_flag, size_t ow_block, size_t oc_block, size_t oc_algin,
79                            size_t in_kw_step, size_t in_kh_step, size_t in_sw_step, size_t kw_remainder);
80 
81 void DepthwiseSW1x32Kernel(float *dst, const float *src, const float *weight, const float *bias, size_t kernel_h,
82                            size_t kernel_w, size_t act_flag, size_t ow_block, size_t oc_block, size_t oc_algin,
83                            size_t in_kw_step, size_t in_kh_step, size_t in_sw_step, size_t kw_remainder);
84 
85 void DepthwiseSW4x24Kernel(float *dst, const float *src, const float *weight, const float *bias, size_t kernel_h,
86                            size_t kernel_w, size_t act_flag, size_t ow_block, size_t oc_block, size_t oc_algin,
87                            size_t in_kw_step, size_t in_kh_step, size_t in_sw_step, size_t kw_remainder);
88 
89 void DepthwiseSW1x24Kernel(float *dst, const float *src, const float *weight, const float *bias, size_t kernel_h,
90                            size_t kernel_w, size_t act_flag, size_t ow_block, size_t oc_block, size_t oc_algin,
91                            size_t in_kw_step, size_t in_kh_step, size_t in_sw_step, size_t kw_remainder);
92 
93 void DepthwiseSW4x16Kernel(float *dst, const float *src, const float *weight, const float *bias, size_t kernel_h,
94                            size_t kernel_w, size_t act_flag, size_t ow_block, size_t oc_block, size_t oc_algin,
95                            size_t in_kw_step, size_t in_kh_step, size_t in_sw_step, size_t kw_remainder);
96 
97 void DepthwiseSW1x16Kernel(float *dst, const float *src, const float *weight, const float *bias, size_t kernel_h,
98                            size_t kernel_w, size_t act_flag, size_t ow_block, size_t oc_block, size_t oc_algin,
99                            size_t in_kw_step, size_t in_kh_step, size_t in_sw_step, size_t kw_remainder);
100 
101 void DepthwiseSW8x8Kernel(float *dst, const float *src, const float *weight, const float *bias, size_t kernel_h,
102                           size_t kernel_w, size_t act_flag, size_t ow_block, size_t oc_block, size_t oc_algin,
103                           size_t in_kw_step, size_t in_kh_step, size_t in_sw_step, size_t kw_remainder);
104 
105 void DepthwiseSW1x8Kernel(float *dst, const float *src, const float *weight, const float *bias, size_t kernel_h,
106                           size_t kernel_w, size_t act_flag, size_t ow_block, size_t oc_block, size_t oc_algin,
107                           size_t in_kw_step, size_t in_kh_step, size_t in_sw_step, size_t kw_remainder);
108 
109 void DepthwiseSWAvxFp32(float *output_data, const float *input_data, const float *weight_data, const float *bias_data,
110                         const ConvParameter *conv_param, const SlidingWindowParam *sliding, int task_id);
111 
112 void DepthwiseBorderAvxFp32(float *dst, const float *src, const float *weight, const float *bias, int top, int left,
113                             int right, const ConvParameter *conv_param, const SlidingWindowParam *sw_param,
114                             const DepthwiseSWKernel kernel, int act_type, int ow_bock, int oc_block);
115 
116 void ConvDwFp32Avx3x3(float *output, float **input, const float *weights, const float *bias, size_t channels,
117                       size_t output_width, size_t input_stride, size_t relu, size_t relu6);
118 
119 void ConvDwFp32Avx5x5(float *output, float **input, const float *weights, const float *bias, size_t channels,
120                       size_t output_width, size_t input_stride, size_t relu, size_t relu6);
121 #ifdef ENABLE_DEBUG
122 void DepthwiseSWWxKKernel(float *dst, const float *src, const float *weight, const float *bias, size_t kernel_h,
123                           size_t kernel_w, size_t act_flag, size_t ow_block, size_t oc_block, size_t oc_algin,
124                           size_t in_kw_step, size_t in_kh_step, size_t in_sw_step, size_t kw_remainder);
125 #endif
126 #endif
127 
128 #if defined(ENABLE_ARM) || (defined(ENABLE_SSE) && !defined(ENABLE_AVX))
129 void ConvDw3x3Line(float *dst, float **lines, const float *weight, const float *bias_data, int width, int ori_channel,
130                    bool relu, bool relu6);
131 void ConvDw3x3(float *output_data, float *buffer, const float *input_data, const float *weight_data,
132                const float *bias_data, const ConvParameter *conv_param, int start_oh, int end_oh);
133 #endif
134 
135 void ConvDwFp32IndirectRow(float *output, float **input, const float *weights, const float *bias, int channels,
136                            int output_width, int input_stride, bool relu, bool relu6, int kernel);
137 
138 void ConvDwIndirection(float *output_data, float **indirect_buffer, const float *weight_data, const float *bias_data,
139                        float *zero_ptr, const ConvParameter *conv_param, int task_id);
140 
141 void DeconvDwSWFp32(float *output_data, const float *input_data, const float *weight_data, const float *bias_data,
142                     const ConvParameter *conv_param, const SlidingWindowParam *sliding, int task_id);
143 
144 #ifdef __cplusplus
145 }
146 #endif
147 
148 #endif  // MINDSPORE_NNACL_FP32_CONV_DEPTHWISE_H_
149