1 /* 2 * Copyright (c) 2021-2022 Arm Limited. 3 * 4 * SPDX-License-Identifier: MIT 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a copy 7 * of this software and associated documentation files (the "Software"), to 8 * deal in the Software without restriction, including without limitation the 9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or 10 * sell copies of the Software, and to permit persons to whom the Software is 11 * furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included in all 14 * copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 * SOFTWARE. 23 */ 24 #ifndef ARM_COMPUTE_CPU_GEMM_CONV2D_H 25 #define ARM_COMPUTE_CPU_GEMM_CONV2D_H 26 27 #include "arm_compute/core/TensorInfo.h" 28 #include "arm_compute/core/Types.h" 29 #include "src/cpu/ICpuOperator.h" 30 31 #include <memory> 32 33 namespace arm_compute 34 { 35 namespace cpu 36 { 37 class CpuGemm; 38 class CpuGemmLowpMatrixMultiplyCore; 39 class CpuGemmLowpOutputStage; 40 namespace kernels 41 { 42 class CpuWeightsReshapeKernel; 43 class CpuIm2ColKernel; 44 class CpuCol2ImKernel; 45 class CpuReshapeKernel; 46 } // namespace kernels 47 48 /** Basic function to compute the convolution layer. This function calls the following kernels/functions: 49 * 50 * -# @ref cpu::kernels::CpuIm2ColKernel 51 * -# @ref CpuGemm (if the data type is BFLOAT16/FP16/FP32) 52 * -# @ref CpuGemmLowpMatrixMultiplyCore (if the data type is QASYMM8/QASYMM8_SIGNED) 53 * -# @ref CpuGemmLowpOutputStage (if the data type is QASYMM8/QASYMM8_SIGNED) 54 * -# @ref cpu::kernels::CpuCol2ImKernel (if NCHW data layout) 55 * -# @ref kernels::CpuWeightsReshapeKernel 56 * 57 */ 58 class CpuGemmConv2d : public ICpuOperator 59 { 60 public: 61 /** Constructor */ 62 CpuGemmConv2d(); 63 /** Prevent instances of this class from being copied (As this class contains pointers) */ 64 CpuGemmConv2d(const CpuGemmConv2d &) = delete; 65 /** Prevent instances of this class from being moved (As this class contains non movable objects) */ 66 CpuGemmConv2d(CpuGemmConv2d &&) = delete; 67 /** Prevent instances of this class from being copied (As this class contains pointers) */ 68 CpuGemmConv2d &operator=(const CpuGemmConv2d &) = delete; 69 /** Prevent instances of this class from being moved (As this class contains non movable objects) */ 70 CpuGemmConv2d &operator=(CpuGemmConv2d &&) = delete; 71 /** Destructor */ 72 ~CpuGemmConv2d(); 73 /** Set the input and output tensors. 74 * 75 * Valid data layouts: 76 * - NHWC 77 * - NCHW 78 * 79 * Valid data type configurations: 80 * |src0 |src1 |src2 |dst | 81 * |:--------------|:------------------|:--------|:--------------| 82 * |F16 |F16 |F16 |F16 | 83 * |F32 |F32 |F32 |F32 | 84 * |BFLOAT16 |BFLOAT16 |BFLOAT16 |BFLOAT16 | 85 * |QASYMM8 |QASYMM8 |S32 |QASYMM8 | 86 * |QASYMM8 |QSYMM8_PER_CHANNEL |S32 |QASYMM8 | 87 * |QASYMM8_SIGNED |QASYMM8_SIGNED |S32 |QASYMM8_SIGNED | 88 * |QASYMM8_SIGNED |QSYMM8_PER_CHANNEL |S32 |QASYMM8_SIGNED | 89 * 90 * @param[in] src Source tensor info. 3 lower dimensions represent a single input [width, height, IFM], 91 * while every optional dimension from 4 and above represent a batch of inputs. 92 * Data types supported: QASYMM8/QASYMM8_SIGNED/BFLOAT16/F16/F32. 93 * @param[in] weights Weights tensor info. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. 94 * Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL/BFLOAT16/F16/F32. 95 * @param[in] biases Biases tensor info. Shared biases supported. Biases are 1D tensor with dimensions [OFM]. 96 * Data type supported: Should match @p input data type, except for input of QASYMM8/QASYMM8_SIGNED type where biases should be of S32 type. 97 * @param[out] dst Destination tensor info. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs. 98 * Data types supported: Same as @p input. 99 * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo. 100 * @param[in] weights_info Specifies if the weights tensor has been reshaped with NEWeightsReshapeKernel. If this is not part of the fully connected layer the weights 101 * tensor has also been transposed with cpu::kernels::CpuGemmTranspose1xWKernel. Data type supported: Same as @p input. 102 * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1). 103 * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU supported. 104 * @param[in] enable_fast_math (Optional) Enable fast math computation. In case this flag were set, the function could dispatch the fastest implementation 105 * available which may introduce a drop of accuracy as well. Default is false 106 * @param[in] num_groups (Optional) Number of groups when performing a grouped convolution. num_groups != 1 is not supported 107 */ 108 void configure(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *biases, ITensorInfo *dst, const PadStrideInfo &conv_info, const WeightsInfo &weights_info = WeightsInfo(), 109 const Size2D &dilation = Size2D(1U, 1U), const ActivationLayerInfo &act_info = ActivationLayerInfo(), bool enable_fast_math = false, unsigned int num_groups = 1); 110 /** Static function to check if given info will lead to a valid configuration 111 * 112 * Similar to CpuGemmConvolution::configure() 113 * 114 * @return a status 115 */ 116 static Status validate(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info, 117 const WeightsInfo &weights_info = WeightsInfo(), const Size2D &dilation = Size2D(1U, 1U), const ActivationLayerInfo &act_info = ActivationLayerInfo(), 118 bool enable_fast_math = false, unsigned int num_groups = 1); 119 120 /** Indicates whether or not there is an optimal assembly implementation that can be used to process the given parameters. 121 * 122 * The paramter list is the same as @ref NEGEMMConvolutionLayer::has_opt_impl 123 * 124 * @return a status. 125 */ 126 static Status has_opt_impl(arm_compute::WeightFormat &expected_weight_format, const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, 127 const PadStrideInfo &conv_info, 128 const WeightsInfo &weights_info = WeightsInfo(), const Size2D &dilation = Size2D(1U, 1U), const ActivationLayerInfo &act_info = ActivationLayerInfo(), 129 const bool enable_fast_math = false); 130 131 // Inherited methods overridden: 132 void run(ITensorPack &tensors) override; 133 void prepare(ITensorPack &tensors) override; 134 experimental::MemoryRequirements workspace() const override; 135 136 private: 137 /** Configures the appropriate matrix multiply routine 138 * 139 * @param[in] src Input tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/BFLOAT16/F16/F32. 140 * @param[in] weights Weights tensor info. Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL/BFLOAT16/F16/F32. 141 * @param[in] biases Biases tensor info. Shared biases supported. Biases are 1D tensor with dimensions [OFM]. 142 * Data type supported: Should match @p input data type, except for input of QASYMM8/QASYMM8_SIGNED type where biases should be of S32 type. 143 * @param[out] dst Output tensor info. Data types supported: Same as @p input, 144 * except for input of QASYMM8/QASYMM8_SIGNED type where output should be of S32 type. 145 * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU supported. 146 * @param[in] enable_fast_math (Optional) Enable fast math computation. In case this flag were set, the function could dispatch the fastest implementation 147 * available which may introduce a drop of accuracy as well. Default is false 148 * @param[in] gemm_3d_depth (Optional) Depth of GEMM 3D (Defaults to 1) 149 * @param[in] fixed_format (Optional) Select GEMM execution with variable weights. 150 * @param[in] weight_format (Optional) The layout to be used for the weights tensor when running GEMM with variable weights. 151 */ 152 void configure_mm(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *biases, ITensorInfo *output, const ActivationLayerInfo &act_info = ActivationLayerInfo(), 153 bool enable_fast_math = false, int gemm_3d_depth = 1, bool fixed_format = false, arm_compute::WeightFormat weight_format = arm_compute::WeightFormat::UNSPECIFIED); 154 /** Static function to check if given info will lead to a valid configuration of @ref NEGEMMConvolutionLayer matrix multiply routines 155 * 156 * @param[in] src Input tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/BFLOAT16/F16/F32. 157 * @param[in] weights Weights tensor info. Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL/BFLOAT16/F16/F32. 158 * @param[in] biases Biases tensor info. Shared biases supported. Biases are 1D tensor with dimensions [OFM]. 159 * Data type supported: Should match @p input data type, except for input of QASYMM8/QASYMM8_SIGNED type where biases should be of S32 type. 160 * @param[in] dst Output tensor info. Data types supported: Same as @p input, 161 * except for input of QASYMM8/QASYMM8_SIGNED type where output should be of S32 type. 162 * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU supported. 163 * @param[in] enable_fast_math (Optional) Enable fast math computation. In case this flag were set, the function could dispatch the fastest implementation 164 * available which may introduce a drop of accuracy as well. Default is false 165 * @param[in] gemm_3d_depth (Optional) Depth of GEMM 3D (Defaults to 1) 166 * @param[in] skip_im2col (Optional) Flag which specifies if im2col has to be skipped. i.e. 1x1 convolution with NHWC data layout. (Default to false) 167 * @param[in] fixed_format (Optional) Select GEMM execution with variable weights. 168 * @param[in] weight_format (Optional) The layout to be used for the weights tensor when running GEMM with variable weights. 169 * 170 * @return a status 171 */ 172 static Status validate_mm(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *dst, const ActivationLayerInfo &act_info = ActivationLayerInfo(), 173 bool enable_fast_math = false, int gemm_3d_depth = 1, bool skip_im2col = false, bool fixed_format = false, arm_compute::WeightFormat weight_format = arm_compute::WeightFormat::UNSPECIFIED); 174 /** Static function to check if GEMM3D is supported in @ref NEGEMM or in @ref CpuGemmMLowpMatrixMultiplyCore 175 * 176 * @param[in] src Input tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/BFLOAT16/F16/F32. 177 * @param[in] weights Weights tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/BFLOAT16/F16/F32. 178 * @param[in] act_info Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU supported. 179 * @param[in] gemm_3d_depth Depth of GEMM 3D 180 * @param[in] skip_im2col Flag which specifies if im2col has to be skipped. i.e. 1x1 convolution with NHWC data layout 181 * 182 * @return a status 183 */ 184 static Status validate_gemm3d(const ITensorInfo *src, const ITensorInfo *weights, const ActivationLayerInfo &act_info, int gemm_3d_depth, bool skip_im2col); 185 186 struct SkipInfo 187 { 188 bool skip_im2col; 189 bool skip_col2im; 190 }; 191 192 /** Static function to provide skip_im2col and skip_col2im information. 193 * 194 * @param[in] src Input tensor info. 195 * @param[in] weights Weights tensor info. 196 * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo. 197 * @param[in] dilation Dilation, in elements, across x and y. 198 * @param[in] act_info Activation layer information in case of a fused activation. 199 * 200 * @return a SkipInfo instance. 201 */ 202 static SkipInfo skip_im_col_info(const ITensorInfo *src, const ITensorInfo *weights, const PadStrideInfo &conv_info, 203 const Size2D &dilation, const ActivationLayerInfo &act_info); 204 205 /** Indicates if the convolution executes in variable weights mode. 206 * 207 * Similar to @ref CpuGemm::isVarWeightsKernel 208 */ 209 bool isVarWeightsKernel() const; 210 enum AuxTensorIdx 211 { 212 // CpuGemmLowpMatrixMultiplyCore has up to 8 internal tensors 213 Im2ColOutput = 9, 214 WeightsReshaped, 215 GemmOutput, 216 Count 217 }; 218 219 std::unique_ptr<kernels::CpuWeightsReshapeKernel> _weights_reshape_kernel; 220 std::unique_ptr<cpu::kernels::CpuIm2ColKernel> _im2col_kernel; 221 std::unique_ptr<CpuGemm> _mm_gemm; 222 std::unique_ptr<CpuGemmLowpMatrixMultiplyCore> _mm_gemmlowp; 223 std::unique_ptr<kernels::CpuCol2ImKernel> _col2im_kernel; 224 std::unique_ptr<kernels::CpuReshapeKernel> _reshape_kernel; 225 226 TensorInfo _im2col_output; 227 TensorInfo _weights_reshaped; 228 TensorInfo _gemm_output; 229 TensorInfo _gemm_output_3d; 230 231 DataLayout _data_layout; 232 233 bool _skip_im2col; 234 bool _skip_col2im; 235 bool _is_quantized; 236 bool _is_prepared; 237 238 experimental::MemoryRequirements _aux_mem{ Count }; 239 }; 240 } // namespace cpu 241 } // namespace arm_compute 242 #endif /* ARM_COMPUTE_CPU_GEMM_CONV2D_H */ 243