1 /* 2 * Copyright (c) 2019-2020 Arm Limited. 3 * 4 * SPDX-License-Identifier: MIT 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a copy 7 * of this software and associated documentation files (the "Software"), to 8 * deal in the Software without restriction, including without limitation the 9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or 10 * sell copies of the Software, and to permit persons to whom the Software is 11 * furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included in all 14 * copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 * SOFTWARE. 23 */ 24 #ifndef ARM_COMPUTE_CORE_KERNEL_DESCRIPTORS_H 25 #define ARM_COMPUTE_CORE_KERNEL_DESCRIPTORS_H 26 27 #include "arm_compute/core/PixelValue.h" 28 #include "arm_compute/core/Types.h" 29 30 namespace arm_compute 31 { 32 /** Descriptor for FFT scale kernels */ 33 struct FFTScaleKernelInfo 34 { 35 float scale{ 0.f }; /**< Axis to perform the kernel on. */ 36 bool conjugate{ true }; /**< Flag to conjugate the output/ */ 37 }; 38 39 /** Descriptor for FFT digit reverse kernels */ 40 struct FFTDigitReverseKernelInfo 41 { 42 unsigned int axis{ 0 }; /**< Axis to perform the kernel on. */ 43 bool conjugate{ false }; /**< Flag to conjugate the output/ */ 44 }; 45 46 /** Descriptor used by the FFT core kernels */ 47 struct FFTRadixStageKernelInfo 48 { 49 unsigned int axis{ 0 }; /**< Axis to run the kernel on. */ 50 unsigned int radix{ 0 }; /**< Radix to use. */ 51 unsigned int Nx{ 0 }; /**< Nx coefficient. */ 52 bool is_first_stage{ false }; /**< Flags if the FFT kernels is the first stage of a decomposed FFT. */ 53 }; 54 55 /** Descriptor used by the GEMM kernels */ 56 struct GEMMKernelInfo 57 { 58 GEMMKernelInfo() = default; GEMMKernelInfoGEMMKernelInfo59 GEMMKernelInfo( 60 unsigned int im, 61 unsigned int in, 62 unsigned int ik, 63 unsigned int idepth_output_gemm3d, 64 bool ireinterpret_input_as_3d, 65 bool ibroadcast_bias, 66 bool ifp_mixed_precision, 67 bool ihas_pad_y, 68 ActivationLayerInfo iactivation_info, 69 int inmult_transpose1xW_width, 70 int imult_interleave4x4_height, 71 GEMMLHSMatrixInfo ilhs_info, 72 GEMMRHSMatrixInfo irhs_info, 73 int32_t ina_offset, 74 int32_t inb_offset) 75 : m(im), n(in), k(ik), depth_output_gemm3d(idepth_output_gemm3d), reinterpret_input_as_3d(ireinterpret_input_as_3d), broadcast_bias(ibroadcast_bias), fp_mixed_precision(ifp_mixed_precision), 76 has_pad_y(ihas_pad_y), activation_info(iactivation_info), mult_transpose1xW_width(inmult_transpose1xW_width), mult_interleave4x4_height(imult_interleave4x4_height), lhs_info(ilhs_info), rhs_info(irhs_info), 77 a_offset(ina_offset), b_offset(inb_offset) 78 { 79 } 80 81 unsigned int m{ 0 }; /**< Number of LHS rows*/ 82 unsigned int n{ 0 }; /**< Number of RHS columns*/ 83 unsigned int k{ 0 }; /**< Number of LHS columns or RHS rows */ 84 unsigned int depth_output_gemm3d{ 0 }; /**< Depth of the output tensor in case is reinterpreted as 3D */ 85 bool reinterpret_input_as_3d{ false }; /**< Flag used to reinterpret the input as 3D */ 86 bool broadcast_bias{ false }; /**< Flag used to broadcast the bias addition */ 87 bool fp_mixed_precision{ false }; /**< Flag used to indicate wider accumulators (32 bit instead of 16 for FP16). */ 88 bool has_pad_y{ false }; /**< Flag used to indicate if the input/output tensors have internal pad on the y direction */ 89 ActivationLayerInfo activation_info{}; /**< Activation function to perform after the matrix multiplication */ 90 int mult_transpose1xW_width{ 1 }; /**< Multiplication factor for the width of the 1xW transposed block */ 91 int mult_interleave4x4_height{ 1 }; /**< Multiplication factor for the height of the 4x4 interleaved block */ 92 GEMMLHSMatrixInfo lhs_info{}; /**< LHS matrix information used to retrieve the number of rows processed by each thread */ 93 GEMMRHSMatrixInfo rhs_info{}; /**< RHS matrix information used for reshaping the RHS matrix */ 94 int32_t a_offset{ 0 }; /**< Offset to be added to each element of the matrix A */ 95 int32_t b_offset{ 0 }; /**< Offset to be added to each element of the matrix B */ 96 GEMMLowpOutputStageInfo output_stage{}; /**< GEMMLowp output stage information */ 97 }; 98 99 /** Descriptor used by the depthwise convolution kernels */ 100 struct DWCKernelInfo 101 { 102 ActivationLayerInfo activation_info{}; /**< Activation function to perform after the depthwise convolution */ 103 }; 104 105 /** Descriptor used by the depthwise convolution kernels to retrieve the number of output elements processed by each thread */ 106 struct DWCWeightsKernelInfo 107 { 108 unsigned int n0{ 0 }; /**< Number of columns processed by each thread */ 109 }; 110 111 /** Descriptor used by the softmax kernels */ 112 struct SoftmaxKernelInfo 113 { 114 float beta{ 1.f }; /**< A scaling factor for the exponent with default value 1.0 */ 115 bool is_log{ false }; /**< Flag used to perform Log Softmax operation */ 116 DataType input_data_type{ DataType::UNKNOWN }; /**< Input tensor data type */ 117 }; 118 119 /** Descriptor used by the direct convolution layer output stage kernels */ 120 struct DirectConvolutionLayerOutputStageKernelInfo 121 { 122 int32_t result_fixedpoint_multiplier{ 0 }; /**< Result output stage multiplier used for quantizing */ 123 int32_t result_shift{ 0 }; /**< Result output stage shift used for quantizing */ 124 int32_t result_offset_after_shift{ 0 }; /**< Result offset used for quantizing */ 125 DataType output_data_type{ DataType::UNKNOWN }; /**< Output tensor data type to use if the output is not initialized */ 126 }; 127 128 struct InstanceNormalizationLayerKernelInfo 129 { 130 /** Default constructor */ InstanceNormalizationLayerKernelInfoInstanceNormalizationLayerKernelInfo131 InstanceNormalizationLayerKernelInfo() 132 : InstanceNormalizationLayerKernelInfo(1.f, 0.f, 1e-12, true) 133 { 134 } 135 /** Constructor 136 * 137 * @param[in] gamma The scale scalar value applied to the normalized tensor. 138 * @param[in] beta The offset scalar value applied to the normalized tensor 139 * @param[in] epsilon Lower bound value for the normalization. 140 * @param[in] use_mixed_precision Use mixed precision in case of FP16 execution. 141 */ InstanceNormalizationLayerKernelInfoInstanceNormalizationLayerKernelInfo142 InstanceNormalizationLayerKernelInfo(float gamma, float beta, float epsilon, bool use_mixed_precision) 143 : gamma(gamma), beta(beta), epsilon(epsilon), use_mixed_precision(use_mixed_precision) 144 { 145 } 146 147 float gamma; /**< The scale scalar value applied to the normalized tensor. Defaults to 1.0 */ 148 float beta; /**< The offset scalar value applied to the normalized tensor. Defaults to 0.0 */ 149 float epsilon; /**< Lower bound value for the normalization. Defaults to 1e-12 */ 150 bool use_mixed_precision; /**< Use mixed precision in case of FP16 execution. Defaults to true */ 151 }; 152 153 struct GEMMLowpReductionKernelInfo 154 { 155 /** Default constructor */ 156 GEMMLowpReductionKernelInfo() = default; 157 /** Constructor 158 * 159 * @param[in] k Number of matrix columns/rows. 160 * @param[in] is_reshaped True if the input tensor has been reshaped. 161 * @param[in] scalar Scalar value to multiply each reduced column/row by. 162 * @param[in] mul_by_scalar True if each column/row reduction has to be multiplied by a scalar value. 163 */ GEMMLowpReductionKernelInfoGEMMLowpReductionKernelInfo164 GEMMLowpReductionKernelInfo(int32_t k, bool is_reshaped, int32_t scalar, bool mul_by_scalar) 165 : k(k), is_reshaped(is_reshaped), scalar(scalar), mul_by_scalar(mul_by_scalar) 166 { 167 } 168 169 int32_t k{ 0 }; /**< Number of matrix columns/rows */ 170 bool is_reshaped{ false }; /**< True if the input tensor has been reshaped */ 171 int32_t scalar{ 0 }; /**< Scalar value to multiply each reduced column/row by */ 172 bool mul_by_scalar{ false }; /**< True if each column/row reduction has to be multiplied by a scalar value */ 173 }; 174 175 struct ScaleKernelInfo 176 { 177 /** Constructor 178 * 179 * @param[in] interpolation_policy Interpolation type to use 180 * @param[in] border_mode Border mode policy 181 * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT and use_padding is set to false. Defaults to default @ref PixelValue 182 * @param[in] sampling_policy (Optional) Sampling policy used by the interpolation. Defaults to @ref SamplingPolicy::CENTER 183 * @param[in] use_padding (Optional) Is padding in use or not. Defaults to true. 184 * @param[in] align_corners (Optional) Align corners of input and output, only affecting bilinear policy with TOP_LEFT sampling policy. Defaults to false. 185 */ 186 ScaleKernelInfo(InterpolationPolicy interpolation_policy, 187 BorderMode border_mode, 188 PixelValue constant_border_value = PixelValue(), 189 SamplingPolicy sampling_policy = SamplingPolicy::CENTER, 190 bool use_padding = true, 191 bool align_corners = false) 192 : interpolation_policy{ interpolation_policy }, 193 border_mode{ border_mode }, 194 constant_border_value{ constant_border_value }, 195 sampling_policy{ sampling_policy }, 196 use_padding{ use_padding }, 197 align_corners{ align_corners } 198 { 199 } 200 201 InterpolationPolicy interpolation_policy; /**< Interpolation type to use */ 202 BorderMode border_mode; /**< Border mode policy */ 203 PixelValue constant_border_value; /**< Constant value to use for constant border mode policy */ 204 SamplingPolicy sampling_policy; /**< Sampling policy used by the interpolation. */ 205 bool use_padding; /**< Indication of using padding */ 206 bool align_corners; /**< Align corners of input and output */ 207 }; 208 209 struct ThresholdKernelInfo 210 { 211 /** Default constructor */ 212 ThresholdKernelInfo() = default; 213 /** Constructor 214 * 215 * @param[in] threshold Threshold. When the threshold type is RANGE, this is used as the lower threshold. 216 * @param[in] false_value value to set when the condition is not respected. 217 * @param[in] true_value value to set when the condition is respected. 218 * @param[in] type Thresholding type. Either RANGE or BINARY. 219 * @param[in] upper Upper threshold. Only used when the thresholding type is RANGE. 220 */ ThresholdKernelInfoThresholdKernelInfo221 ThresholdKernelInfo(uint8_t threshold, uint8_t false_value, uint8_t true_value, ThresholdType type, uint8_t upper) 222 : threshold(threshold), false_value(false_value), true_value(true_value), type(type), upper(upper) 223 { 224 } 225 226 uint8_t threshold{ 0 }; 227 uint8_t false_value{ 0 }; 228 uint8_t true_value{ 0 }; 229 ThresholdType type{ ThresholdType::BINARY }; 230 uint8_t upper{ 0 }; 231 }; 232 } // namespace arm_compute 233 #endif /* ARM_COMPUTE_CORE_KERNEL_DESCRIPTORS_H */ 234