1 /** 2 * Copyright 2020 Huawei Technologies Co., Ltd 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef MINDSPORE_NNACL_FP32_CONV_DEPTHWISE_H_ 18 #define MINDSPORE_NNACL_FP32_CONV_DEPTHWISE_H_ 19 20 #include "nnacl/conv_parameter.h" 21 #include "nnacl/base/conv_common_base.h" 22 23 #ifdef __cplusplus 24 extern "C" { 25 #endif 26 27 #ifndef ENABLE_ARM64 28 void DepthwiseCenter(float *dst, const float *src, const float *weight, const float *bias, int height, int width, 29 int kernel_h, int kernel_w, int out_h_step, int block_channel, int in_sh_step, int in_sw_step, 30 int in_kh_step, int in_kw_step, bool is_relu, bool is_relu6); 31 #endif 32 33 int ConvDw(float *output_data, const float *input_data, const float *weight_data, const float *bias_data, 34 const ConvParameter *conv_param, int task_id); 35 36 int ConvDwAVX512(float *output_data, const float *input_data, const float *weight_data, const float *bias_data, 37 const ConvParameter *conv_param, int task_id, ConvDwCalcParam *conv_dw_calc_param_); 38 39 void ConvDwAVX512Fp32Row(float *output_ptr, const float *input_ptr, const float *weight_ptr, size_t num_pixels, 40 size_t output_channel, size_t input_step, bool first_calc_flag, const float *bias); 41 42 void InitSlidingParam(SlidingWindowParam *sliding, const ConvParameter *conv_param, int block); 43 44 void InitSlidingParamConv(SlidingWindowParam *sliding, const ConvParameter *conv_param, int input_block, 45 int weight_block); 46 47 void AppendSlidingParamConv(SlidingWindowParam *sliding, const ConvParameter *conv_param, int in_block, 48 int weight_block); 49 50 void InitSlidingParamConvDw(SlidingWindowParam *sliding, const ConvParameter *conv_param, int block); 51 52 void AppendSlidingParamConvDw(SlidingWindowParam *sliding, const ConvParameter *conv_param, int block); 53 54 void ConvDwSWFp32(float *output_data, const float *input_data, const float *weight_data, const float *bias_data, 55 const ConvParameter *conv_param, const SlidingWindowParam *sliding, int task_id); 56 57 bool CheckConvDwUse3X3(const ConvParameter *conv_param); 58 59 bool CheckConvDwUseIndirectBuffer(const ConvParameter *conv_param); 60 61 void ConvDwInitIndirection(float **indirect_buffer, float *src, float *zero_ptr, const ConvParameter *conv_param, 62 int step_h, int step_w); 63 64 #ifdef ENABLE_ARM64 65 void ConvDwFp32Indirect3x3(float *output, float **input, const float *weights, const float *bias, int channels, 66 int output_width, size_t input_stride, size_t relu, size_t relu6); 67 68 void ConvDwFp32Indirect5x5(float *output, float **input, const float *weights, const float *bias, int channels, 69 int output_width, size_t input_stride, size_t relu, size_t relu6); 70 #endif 71 72 #ifdef ENABLE_AVX 73 typedef void (*DepthwiseSWKernel)(float *dst, const float *src, const float *weight, const float *bias, size_t kernel_h, 74 size_t kernel_w, size_t act_flag, size_t ow_block, size_t oc_block, size_t oc_algin, 75 size_t in_kw_step, size_t in_kh_step, size_t in_sw_step, size_t kw_remainder); 76 77 void DepthwiseSW3x32Kernel(float *dst, const float *src, const float *weight, const float *bias, size_t kernel_h, 78 size_t kernel_w, size_t act_flag, size_t ow_block, size_t oc_block, size_t oc_algin, 79 size_t in_kw_step, size_t in_kh_step, size_t in_sw_step, size_t kw_remainder); 80 81 void DepthwiseSW1x32Kernel(float *dst, const float *src, const float *weight, const float *bias, size_t kernel_h, 82 size_t kernel_w, size_t act_flag, size_t ow_block, size_t oc_block, size_t oc_algin, 83 size_t in_kw_step, size_t in_kh_step, size_t in_sw_step, size_t kw_remainder); 84 85 void DepthwiseSW4x24Kernel(float *dst, const float *src, const float *weight, const float *bias, size_t kernel_h, 86 size_t kernel_w, size_t act_flag, size_t ow_block, size_t oc_block, size_t oc_algin, 87 size_t in_kw_step, size_t in_kh_step, size_t in_sw_step, size_t kw_remainder); 88 89 void DepthwiseSW1x24Kernel(float *dst, const float *src, const float *weight, const float *bias, size_t kernel_h, 90 size_t kernel_w, size_t act_flag, size_t ow_block, size_t oc_block, size_t oc_algin, 91 size_t in_kw_step, size_t in_kh_step, size_t in_sw_step, size_t kw_remainder); 92 93 void DepthwiseSW4x16Kernel(float *dst, const float *src, const float *weight, const float *bias, size_t kernel_h, 94 size_t kernel_w, size_t act_flag, size_t ow_block, size_t oc_block, size_t oc_algin, 95 size_t in_kw_step, size_t in_kh_step, size_t in_sw_step, size_t kw_remainder); 96 97 void DepthwiseSW1x16Kernel(float *dst, const float *src, const float *weight, const float *bias, size_t kernel_h, 98 size_t kernel_w, size_t act_flag, size_t ow_block, size_t oc_block, size_t oc_algin, 99 size_t in_kw_step, size_t in_kh_step, size_t in_sw_step, size_t kw_remainder); 100 101 void DepthwiseSW8x8Kernel(float *dst, const float *src, const float *weight, const float *bias, size_t kernel_h, 102 size_t kernel_w, size_t act_flag, size_t ow_block, size_t oc_block, size_t oc_algin, 103 size_t in_kw_step, size_t in_kh_step, size_t in_sw_step, size_t kw_remainder); 104 105 void DepthwiseSW1x8Kernel(float *dst, const float *src, const float *weight, const float *bias, size_t kernel_h, 106 size_t kernel_w, size_t act_flag, size_t ow_block, size_t oc_block, size_t oc_algin, 107 size_t in_kw_step, size_t in_kh_step, size_t in_sw_step, size_t kw_remainder); 108 109 void DepthwiseSWAvxFp32(float *output_data, const float *input_data, const float *weight_data, const float *bias_data, 110 const ConvParameter *conv_param, const SlidingWindowParam *sliding, int task_id); 111 112 void DepthwiseBorderAvxFp32(float *dst, const float *src, const float *weight, const float *bias, int top, int left, 113 int right, const ConvParameter *conv_param, const SlidingWindowParam *sw_param, 114 const DepthwiseSWKernel kernel, int act_type, int ow_bock, int oc_block); 115 116 void ConvDwFp32Avx3x3(float *output, float **input, const float *weights, const float *bias, size_t channels, 117 size_t output_width, size_t input_stride, size_t relu, size_t relu6); 118 119 void ConvDwFp32Avx5x5(float *output, float **input, const float *weights, const float *bias, size_t channels, 120 size_t output_width, size_t input_stride, size_t relu, size_t relu6); 121 #ifdef ENABLE_DEBUG 122 void DepthwiseSWWxKKernel(float *dst, const float *src, const float *weight, const float *bias, size_t kernel_h, 123 size_t kernel_w, size_t act_flag, size_t ow_block, size_t oc_block, size_t oc_algin, 124 size_t in_kw_step, size_t in_kh_step, size_t in_sw_step, size_t kw_remainder); 125 #endif 126 #endif 127 128 #if defined(ENABLE_ARM) || (defined(ENABLE_SSE) && !defined(ENABLE_AVX)) 129 void ConvDw3x3Line(float *dst, float **lines, const float *weight, const float *bias_data, int width, int ori_channel, 130 bool relu, bool relu6); 131 void ConvDw3x3(float *output_data, float *buffer, const float *input_data, const float *weight_data, 132 const float *bias_data, const ConvParameter *conv_param, int start_oh, int end_oh); 133 #endif 134 135 void ConvDwFp32IndirectRow(float *output, float **input, const float *weights, const float *bias, int channels, 136 int output_width, int input_stride, bool relu, bool relu6, int kernel); 137 138 void ConvDwIndirection(float *output_data, float **indirect_buffer, const float *weight_data, const float *bias_data, 139 float *zero_ptr, const ConvParameter *conv_param, int task_id); 140 141 void DeconvDwSWFp32(float *output_data, const float *input_data, const float *weight_data, const float *bias_data, 142 const ConvParameter *conv_param, const SlidingWindowParam *sliding, int task_id); 143 144 #ifdef __cplusplus 145 } 146 #endif 147 148 #endif // MINDSPORE_NNACL_FP32_CONV_DEPTHWISE_H_ 149