1 /* 2 * Copyright (c) 2017-2020 Arm Limited. 3 * 4 * SPDX-License-Identifier: MIT 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a copy 7 * of this software and associated documentation files (the "Software"), to 8 * deal in the Software without restriction, including without limitation the 9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or 10 * sell copies of the Software, and to permit persons to whom the Software is 11 * furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included in all 14 * copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 * SOFTWARE. 23 */ 24 #ifndef ARM_COMPUTE_NEGEMMCONVOLUTIONLAYER_H 25 #define ARM_COMPUTE_NEGEMMCONVOLUTIONLAYER_H 26 27 #include "arm_compute/runtime/IFunction.h" 28 29 #include "arm_compute/core/Types.h" 30 #include "arm_compute/runtime/IWeightsManager.h" 31 #include "arm_compute/runtime/MemoryGroup.h" 32 #include "arm_compute/runtime/NEON/functions/NEGEMM.h" 33 #include "arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h" 34 #include "arm_compute/runtime/NEON/functions/NEGEMMLowpOutputStage.h" 35 #include "arm_compute/runtime/NEON/functions/NEReshapeLayer.h" 36 #include "arm_compute/runtime/Tensor.h" 37 38 #include <memory> 39 40 namespace arm_compute 41 { 42 class ITensor; 43 class NECol2ImKernel; 44 class NEIm2ColKernel; 45 class NEWeightsReshapeKernel; 46 47 /** Function to reshape the weights. This function calls the following kernel: 48 * -# @ref NEWeightsReshapeKernel 49 */ 50 class NEConvolutionLayerReshapeWeights : public IFunction 51 { 52 public: 53 /** Constructor */ 54 NEConvolutionLayerReshapeWeights(); 55 /** Prevent instances of this class from being copied (As this class contains pointers) */ 56 NEConvolutionLayerReshapeWeights(const NEConvolutionLayerReshapeWeights &) = delete; 57 /** Prevent instances of this class from being moved (As this class contains non movable objects) */ 58 NEConvolutionLayerReshapeWeights(NEConvolutionLayerReshapeWeights &&) = delete; 59 /** Prevent instances of this class from being copied (As this class contains pointers) */ 60 NEConvolutionLayerReshapeWeights &operator=(const NEConvolutionLayerReshapeWeights &) = delete; 61 /** Prevent instances of this class from being moved (As this class contains non movable objects) */ 62 NEConvolutionLayerReshapeWeights &operator=(NEConvolutionLayerReshapeWeights &&) = delete; 63 /** Default destructor */ 64 ~NEConvolutionLayerReshapeWeights(); 65 /** Set the input and output tensors. 66 * 67 * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. 68 * Data type supported: All. 69 * @param[in] biases Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM]. 70 * Data type supported: same as @p weights. 71 * @warning Appending biases to weights reshaped matrix is not supported for quantized asymmetric types. 72 * @param[out] output Destination tensor. Data types supported: same as @p weights. 73 */ 74 void configure(const ITensor *weights, const ITensor *biases, ITensor *output); 75 /** Static function to check if given info will lead to a valid configuration of @ref NEConvolutionLayerReshapeWeights 76 * 77 * @param[in] weights Weights tensor info. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. 78 * Data type supported: All. 79 * @param[in] biases Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM]. 80 * Data type supported: same as @p weights. 81 * @warning Appending biases to weights reshaped matrix is not supported for quantized asymmetric types. 82 * @param[in] output Destination tensor. Data types supported: same as @p weights. 83 * 84 * @return an error status 85 */ 86 static Status validate(const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output); 87 88 // Inherited methods overridden: 89 void run() override; 90 91 private: 92 std::unique_ptr<NEWeightsReshapeKernel> _weights_reshape_kernel; 93 }; 94 95 namespace weights_transformations 96 { 97 /** Basic function to manage the reshape weights generated from @ref NEConvolutionLayerReshapeWeights */ 98 class NEConvolutionLayerReshapeWeightsTransform : public ITransformWeights 99 { 100 public: 101 /** Constructor */ 102 NEConvolutionLayerReshapeWeightsTransform() = default; 103 /** Prevent instances of this class from being copied (As this class contains pointers) */ 104 NEConvolutionLayerReshapeWeightsTransform(const NEConvolutionLayerReshapeWeightsTransform &) = delete; 105 /** Prevent instances of this class from being copied (As this class contains pointers) */ 106 NEConvolutionLayerReshapeWeightsTransform &operator=(const NEConvolutionLayerReshapeWeightsTransform &) = delete; 107 /** Prevent instances of this class from being moved (As this class contains non movable objects) */ 108 NEConvolutionLayerReshapeWeightsTransform(NEConvolutionLayerReshapeWeightsTransform &&) = delete; 109 /** Prevent instances of this class from being moved (As this class contains non movable objects) */ 110 NEConvolutionLayerReshapeWeightsTransform &operator=(NEConvolutionLayerReshapeWeightsTransform &&) = delete; 111 /** Default destructor */ 112 ~NEConvolutionLayerReshapeWeightsTransform() = default; configure(const ITensor * input,const ITensor * biases)113 void configure(const ITensor *input, const ITensor *biases) 114 { 115 _bias_bit = (biases != nullptr) ? 1 : 0; 116 _func.configure(input, biases, &_output); 117 } 118 run()119 void run() override 120 { 121 _output.allocator()->allocate(); 122 _func.run(); 123 _reshape_run = true; 124 } 125 get_weights()126 ITensor *get_weights() override 127 { 128 return &_output; 129 } 130 release()131 void release() override 132 { 133 _output.allocator()->free(); 134 } 135 uid()136 uint32_t uid() override 137 { 138 return ((0x8) | (_bias_bit << 7)); 139 } 140 is_reshape_run()141 bool is_reshape_run() 142 { 143 return _reshape_run; 144 } 145 146 private: 147 Tensor _output{}; 148 NEConvolutionLayerReshapeWeights _func{}; 149 int32_t _bias_bit{ 0 }; 150 }; 151 } // namespace weights_transformations 152 153 /** Basic function to compute the convolution layer. This function calls the following NEON kernels/functions: 154 * 155 * -# @ref NEIm2ColKernel 156 * -# @ref NEGEMM (if the data type is BFLOAT16/FP16/FP32) 157 * -# @ref NEGEMMLowpMatrixMultiplyCore (if the data type is QASYMM8/QASYMM8_SIGNED) 158 * -# @ref NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint (if the data type is QASYMM8/QASYMM8_SIGNED) 159 * -# @ref NEArithmeticAdditionKernel (if biases != nullptr and we have a 1x1 convolution with the NHWC data layout) 160 * -# @ref NECol2ImKernel (if NCHW data layout) 161 * 162 */ 163 class NEGEMMConvolutionLayer : public IFunction 164 { 165 public: 166 /** Constructor */ 167 NEGEMMConvolutionLayer(const std::shared_ptr<IMemoryManager> &memory_manager = nullptr, IWeightsManager *weights_manager = nullptr); 168 /** Prevent instances of this class from being copied (As this class contains pointers) */ 169 NEGEMMConvolutionLayer(const NEGEMMConvolutionLayer &) = delete; 170 /** Prevent instances of this class from being moved (As this class contains non movable objects) */ 171 NEGEMMConvolutionLayer(NEGEMMConvolutionLayer &&) = delete; 172 /** Prevent instances of this class from being copied (As this class contains pointers) */ 173 NEGEMMConvolutionLayer &operator=(const NEGEMMConvolutionLayer &) = delete; 174 /** Prevent instances of this class from being moved (As this class contains non movable objects) */ 175 NEGEMMConvolutionLayer &operator=(NEGEMMConvolutionLayer &&) = delete; 176 /** Default destructor */ 177 ~NEGEMMConvolutionLayer(); 178 /** Set the input and output tensors. 179 * 180 * @param[in] input Source tensor. 3 lower dimensions represent a single input [width, height, IFM], 181 * while every optional dimension from 4 and above represent a batch of inputs. 182 * Data types supported: QASYMM8/QASYMM8_SIGNED/BFLOAT16/F16/F32. 183 * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. 184 * Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL/BFLOAT16/F16/F32. 185 * @param[in] biases Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM]. 186 * Data type supported: Should match @p input data type, except for input of QASYMM8/QASYMM8_SIGNED type where biases should be of S32 type. 187 * @param[out] output Destination tensor. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs. 188 * Data types supported: Same as @p input. 189 * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo. 190 * @param[in] weights_info Specifies if the weights tensor has been reshaped with NEWeightsReshapeKernel. If this is not part of the fully connected layer the weights 191 * tensor has also been transposed with NEGEMMTranspose1xWKernel. Data type supported: Same as @p input. 192 * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1). 193 * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU supported. 194 * @param[in] num_groups (Optional) Number of groups when performing a grouped convolution. num_groups != 1 is not supported 195 */ 196 void configure(const ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info, const WeightsInfo &weights_info = WeightsInfo(), 197 const Size2D &dilation = Size2D(1U, 1U), const ActivationLayerInfo &act_info = ActivationLayerInfo(), unsigned int num_groups = 1); 198 /** Static function to check if given info will lead to a valid configuration of @ref NEGEMMConvolutionLayer 199 * 200 * @param[in] input Source tensor info. 3 lower dimensions represent a single input [width, height, IFM], 201 * while every optional dimension from 4 and above represent a batch of inputs. 202 * Data types supported: QASYMM8/QASYMM8_SIGNED/BFLOAT16/F16/F32. 203 * @param[in] weights Weights tensor info. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. 204 * Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL/BFLOAT16/F16/F32. 205 * @param[in] biases Biases tensor info. Shared biases supported. Biases are 1D tensor with dimensions [OFM]. 206 * Data type supported: Should match @p input data type, except for input of QASYMM8/QASYMM8_SIGNED type where biases should be of S32 type. 207 * @param[in] output Destination tensor info. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs. 208 * Data types supported: Same as @p input. 209 * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo. 210 * @param[in] weights_info Specifies if the weights tensor has been reshaped with NEWeightsReshapeKernel. If this is not part of the fully connected layer the weights 211 * tensor has also been transposed with NEGEMMTranspose1xWKernel. Data type supported: Same as @p input. 212 * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1). 213 * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU supported. 214 * @param[in] num_groups (Optional) Number of groups when performing a grouped convolution. num_groups != 1 is not supported 215 * 216 * @return a status 217 */ 218 static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info, 219 const WeightsInfo &weights_info = WeightsInfo(), const Size2D &dilation = Size2D(1U, 1U), const ActivationLayerInfo &act_info = ActivationLayerInfo(), unsigned int num_groups = 1); 220 221 // Inherited methods overridden: 222 void run() override; 223 void prepare() override; 224 225 private: 226 /** Configures the appropriate matrix multiply routine 227 * 228 * @param[in] input Input tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/BFLOAT16/F16/F32. 229 * @param[in] weights Weights tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL/BFLOAT16/F16/F32. 230 * @param[in] biases Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM]. 231 * Data type supported: Should match @p input data type, except for input of QASYMM8/QASYMM8_SIGNED type where biases should be of S32 type. 232 * @param[out] output Output tensor. Data types supported: Same as @p input, 233 * except for input of QASYMM8/QASYMM8_SIGNED type where output should be of S32 type. 234 * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU supported. 235 * @param[in] gemm_3d_depth (Optional) Depth of GEMM 3D (Defaults to 1) 236 */ 237 void configure_mm(const ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const ActivationLayerInfo &act_info = ActivationLayerInfo(), int gemm_3d_depth = 1); 238 /** Static function to check if given info will lead to a valid configuration of @ref NEGEMMConvolutionLayer matrix multiply routines 239 * 240 * @param[in] input Input tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/BFLOAT16/F16/F32. 241 * @param[in] weights Weights tensor info. Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL/BFLOAT16/F16/F32. 242 * @param[in] biases Biases tensor info. Shared biases supported. Biases are 1D tensor with dimensions [OFM]. 243 * Data type supported: Should match @p input data type, except for input of QASYMM8/QASYMM8_SIGNED type where biases should be of S32 type. 244 * @param[in] output Output tensor info. Data types supported: Same as @p input, 245 * except for input of QASYMM8/QASYMM8_SIGNED type where output should be of S32 type. 246 * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU supported. 247 * @param[in] gemm_3d_depth (Optional) Depth of GEMM 3D (Defaults to 1) 248 * @param[in] skip_im2col (Optional) Flag which specifies if im2col has to be skipped. i.e. 1x1 convolution with NHWC data layout. (Default to false) 249 * 250 * @return a status 251 */ 252 static Status validate_mm(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const ActivationLayerInfo &act_info = ActivationLayerInfo(), 253 int gemm_3d_depth = 1, bool skip_im2col = false); 254 /** Static function to check if GEMM3D is supported in @ref NEGEMM or in @ref NEGEMMLowpMatrixMultiplyCore 255 * 256 * @param[in] input_info Input tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/BFLOAT16/F16/F32. 257 * @param[in] weights_info Weights tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/BFLOAT16/F16/F32. 258 * @param[in] act_info Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU supported. 259 * @param[in] gemm_3d_depth Depth of GEMM 3D 260 * @param[in] skip_im2col Flag which specifies if im2col has to be skipped. i.e. 1x1 convolution with NHWC data layout 261 * 262 * @return a status 263 */ 264 static Status validate_gemm3d(const ITensorInfo *input_info, const ITensorInfo *weights_info, const ActivationLayerInfo &act_info, int gemm_3d_depth, bool skip_im2col); 265 266 private: 267 MemoryGroup _memory_group; 268 IWeightsManager *_weights_manager; 269 NEConvolutionLayerReshapeWeights _reshape_weights; 270 weights_transformations::NEConvolutionLayerReshapeWeightsTransform _reshape_weights_managed; 271 std::unique_ptr<NEIm2ColKernel> _im2col_kernel; 272 NEGEMM _mm_gemm; 273 NEGEMMLowpMatrixMultiplyCore _mm_gemmlowp; 274 std::unique_ptr<NECol2ImKernel> _col2im_kernel; 275 NEReshapeLayer _reshape_layer; 276 277 const ITensor *_original_weights; 278 279 Tensor _im2col_output; 280 Tensor _weights_reshaped; 281 Tensor _gemm_output; 282 Tensor _tmp_output; 283 284 DataLayout _data_layout; 285 286 bool _skip_im2col; 287 bool _skip_col2im; 288 bool _is_quantized; 289 bool _is_prepared; 290 }; 291 } // namespace arm_compute 292 #endif /* ARM_COMPUTE_NECONVOLUTIONGEMMLAYER_H */ 293