1 /* 2 * Copyright (c) 2017-2020 Arm Limited. 3 * 4 * SPDX-License-Identifier: MIT 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a copy 7 * of this software and associated documentation files (the "Software"), to 8 * deal in the Software without restriction, including without limitation the 9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or 10 * sell copies of the Software, and to permit persons to whom the Software is 11 * furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included in all 14 * copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 * SOFTWARE. 23 */ 24 #ifndef ARM_COMPUTE_NEDEPTHWISECONVOLUTION_H 25 #define ARM_COMPUTE_NEDEPTHWISECONVOLUTION_H 26 27 #include "arm_compute/runtime/NEON/functions/NEActivationLayer.h" 28 #include "arm_compute/runtime/NEON/functions/NEPermute.h" 29 #include "arm_compute/runtime/NEON/functions/assembly/NEDepthwiseConvolutionAssemblyDispatch.h" 30 #include <memory> 31 32 namespace arm_compute 33 { 34 // Forward declarations 35 class ITensor; 36 class NEDepthwiseConvolutionLayerNativeKernel; 37 38 /** Function to execute a depthwise convolution. 39 */ 40 class NEDepthwiseConvolutionLayer : public IFunction 41 { 42 public: 43 /** Default constructor */ 44 NEDepthwiseConvolutionLayer(std::shared_ptr<IMemoryManager> memory_manager = nullptr); 45 /** Prevent instances of this class from being copied (As this class contains pointers) */ 46 NEDepthwiseConvolutionLayer(const NEDepthwiseConvolutionLayer &) = delete; 47 /** Default move constructor */ 48 NEDepthwiseConvolutionLayer(NEDepthwiseConvolutionLayer &&) = default; 49 /** Prevent instances of this class from being copied (As this class contains pointers) */ 50 NEDepthwiseConvolutionLayer &operator=(const NEDepthwiseConvolutionLayer &) = delete; 51 /** Default move assignment operator */ 52 NEDepthwiseConvolutionLayer &operator=(NEDepthwiseConvolutionLayer &&) = default; 53 /** Default destructor */ 54 ~NEDepthwiseConvolutionLayer(); 55 /** Initialize the function's source, destination, weights and convolution information. 56 * 57 * @param[in, out] input Source tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/F16/F32 58 * @param[out] output Destination tensor. Data type supported: same as @p input. 59 * @param[in] weights Weights tensor. These are 3D tensors with shape [kernel_x, kernel_y, IFM]. 60 * Data type supported: Same as @p input or QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL when @p input is QASYMM8/QASYMM8_SIGNED. 61 * @param[in] biases Biases tensor. A 1D tensor with shape [IFM]. Must be nullptr if not needed. 62 * Data type supported: Same as @p input, S32 when input is QASYMM8/QASYMM8_SIGNED. 63 * @param[in] conv_info Padding and stride information to use for the convolution. 64 * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1. 65 * @param[in] act_info (Optional) Activation layer information in case of a fused activation. 66 * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1). 67 */ 68 void configure(ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info, 69 unsigned int depth_multiplier = 1, const ActivationLayerInfo &act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U)); 70 71 /** Static function to check if given info will lead to a valid configuration of @ref NEDepthwiseConvolutionLayer 72 * 73 * @param[in] input Source tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/F16/F32 74 * @param[in] output Destination tensor. Data type supported: same as @p input. 75 * @param[in] weights Weights tensor. These are 3D tensors with shape [kernel_x, kernel_y, IFM]. 76 * Data type supported: Same as @p input or QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL when @p input is QASYMM8/QASYMM8_SIGNED. 77 * @param[in] biases Biases tensor. A 1D tensor with shape [IFM]. Must be nullptr if not needed. 78 * Data type supported: Same as @p input, S32 when input is QASYMM8/QASYMM8_SIGNED. 79 * @param[in] conv_info Padding and stride information to use for the convolution. 80 * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1. 81 * @param[in] act_info (Optional) Activation layer information in case of a fused activation. 82 * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1). 83 * 84 * @return a status 85 */ 86 static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info, 87 unsigned int depth_multiplier = 1, const ActivationLayerInfo &act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U)); 88 89 // Inherited methods overriden: 90 void run() override; 91 void prepare() override; 92 93 private: 94 /** Static function to choose the best depthwise convolution function for @ref NEDepthwiseConvolutionLayer 95 * 96 * @param[in] input Source tensor info. Data type supported: QASYMM8/QASYMM8_SIGNED/F16/F32 97 * @param[in] weights Weights tensor info. These are 3D tensors with shape [kernel_x, kernel_y, IFM]. 98 * Data type supported: Same as @p input or QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL when @p input is QASYMM8/QASYMM8_SIGNED. 99 * @param[in] biases Biases tensor info. A 1D tensor with shape [IFM]. Must be nullptr if not needed. 100 * Data type supported: Same as @p input, S32 when input is QASYMM8/QASYMM8_SIGNED. 101 * @param[in] output Destination tensor. Data type supported: same as @p input. 102 * @param[in] conv_info Padding and stride information to use for the convolution. 103 * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1. 104 * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU for 3x3 quantized are supported. 105 * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1). 106 * 107 * @return a Depthwise Convolution Function 108 */ 109 static DepthwiseConvolutionFunction get_depthwiseconvolution_function(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, 110 const PadStrideInfo &conv_info, unsigned int depth_multiplier = 1, 111 ActivationLayerInfo act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U)); 112 113 /** Basic function to execute optimized depthwise convolution routines. This function calls the following NEON kernels: 114 * 115 * @note At the moment 3x3 and 5x5 convolution of stride 1, 2 are supported 116 * 117 * -# @ref NEFillBorderKernel (if pad_x or pad_y > 0) and no assembly kernel implementation is present 118 * -# @ref NEDepthwiseConvolutionLayer3x3Kernel if 3x3 and no assembly kernel implementation is present 119 * -# @ref NEDepthwiseConvolutionAssemblyDispatch if assembly kernel implementation is present 120 * -# @ref NEDirectConvolutionLayerOutputStageKernel if re-quantization of output is required 121 * -# @ref NEActivationLayer if fused activation is required 122 * 123 */ 124 class NEDepthwiseConvolutionLayerOptimizedInternal : public IFunction 125 { 126 public: 127 /** Default constructor */ 128 NEDepthwiseConvolutionLayerOptimizedInternal(std::shared_ptr<IMemoryManager> memory_manager = nullptr); 129 /** Prevent instances of this class from being copied (As this class contains pointers) */ 130 NEDepthwiseConvolutionLayerOptimizedInternal(const NEDepthwiseConvolutionLayerOptimizedInternal &) = delete; 131 /** Default move constructor */ 132 NEDepthwiseConvolutionLayerOptimizedInternal(NEDepthwiseConvolutionLayerOptimizedInternal &&) = default; 133 /** Prevent instances of this class from being copied (As this class contains pointers) */ 134 NEDepthwiseConvolutionLayerOptimizedInternal &operator=(const NEDepthwiseConvolutionLayerOptimizedInternal &) = delete; 135 /** Default move assignment operator */ 136 NEDepthwiseConvolutionLayerOptimizedInternal &operator=(NEDepthwiseConvolutionLayerOptimizedInternal &&) = default; 137 /** Default destructor */ 138 ~NEDepthwiseConvolutionLayerOptimizedInternal() = default; 139 /** Initialize the function's source, destination, kernels and border_size. 140 * 141 * @param[in, out] input Source tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/F16/F32. (Written to only for border filling). 142 * @param[in] weights Weights tensor. These are 3D tensors with shape [kernel_x, kernel_y, IFM]. Data type supported: Same as @p input. 143 * @param[in] biases Biases tensor. A 1D tensor with shape [IFM]. Must be nullptr if not needed. 144 * Data type supported: Same as @p input, S32 when input is QASYMM8/QASYMM8_SIGNED. 145 * @param[out] output Destination tensor. Data type supported: same as @p input. 146 * @param[in] conv_info Padding and stride information to use for the convolution. 147 * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1. 148 * @param[in] act_info (Optional) Activation layer information in case of a fused activation. 149 * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1). 150 */ 151 void configure(ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info, 152 unsigned int depth_multiplier = 1, const ActivationLayerInfo &act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U)); 153 154 /** Static function to check if given info will lead to a valid configuration of @ref NEDepthwiseConvolutionLayer3x3 155 * 156 * @param[in] input Source tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/F16/F32. (Written to only for border filling). 157 * @param[in] weights Weights tensor. These are 3D tensors with shape [kernel_x, kernel_y, IFM]. Data type supported: Same as @p input. 158 * @param[in] biases Biases tensor. A 1D tensor with shape [IFM]. Must be nullptr if not needed. 159 * Data type supported: Same as @p input, S32 when input is QASYMM8/QASYMM8_SIGNED. 160 * @param[in] output Destination tensor. Data type supported: same as @p input. 161 * @param[in] conv_info Padding and stride information to use for the convolution. 162 * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1. 163 * @param[in] act_info (Optional) Activation layer information in case of a fused activation. 164 * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1). 165 * 166 * @return a status 167 */ 168 static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info, 169 unsigned int depth_multiplier = 1, const ActivationLayerInfo &act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U)); 170 171 // Inherited methods overriden: 172 void run() override; 173 void prepare() override; 174 175 private: 176 MemoryGroup _memory_group; 177 NEDepthwiseConvolutionAssemblyDispatch _dwc_optimized_func; 178 NEPermute _permute_input; 179 NEPermute _permute_weights; 180 NEPermute _permute_output; 181 NEActivationLayer _activationlayer_function; 182 Tensor _accumulator; 183 Tensor _permuted_input; 184 Tensor _permuted_weights; 185 Tensor _permuted_output; 186 const ITensor *_original_weights; 187 bool _has_bias; 188 bool _is_quantized; 189 bool _is_nchw; 190 bool _permute; 191 bool _is_activationlayer_enabled; 192 bool _is_prepared; 193 }; 194 195 /** Basic function to execute a generic depthwise convolution. This function calls the following NEON kernel: 196 * 197 * -# @ref NEDepthwiseConvolutionLayerNativeKernel 198 * 199 */ 200 class NEDepthwiseConvolutionLayerGeneric : public IFunction 201 { 202 public: 203 /** Default constructor */ 204 NEDepthwiseConvolutionLayerGeneric(); 205 /** Prevent instances of this class from being copied (As this class contains pointers) */ 206 NEDepthwiseConvolutionLayerGeneric(const NEDepthwiseConvolutionLayerGeneric &) = delete; 207 /** Default move constructor */ 208 NEDepthwiseConvolutionLayerGeneric(NEDepthwiseConvolutionLayerGeneric &&) = default; 209 /** Prevent instances of this class from being copied (As this class contains pointers) */ 210 NEDepthwiseConvolutionLayerGeneric &operator=(const NEDepthwiseConvolutionLayerGeneric &) = delete; 211 /** Default move assignment operator */ 212 NEDepthwiseConvolutionLayerGeneric &operator=(NEDepthwiseConvolutionLayerGeneric &&) = default; 213 /** Default destructor */ 214 ~NEDepthwiseConvolutionLayerGeneric() = default; 215 /** Initialize the function's source, destination, weights and convolution information. 216 * 217 * @param[in, out] input Source tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/F16/F32. (Written to only for border filling). 218 * @param[out] output Destination tensor. Data type supported: same as @p input. 219 * @param[in] weights Weights tensor. These are 3D tensors with shape [kernel_x, kernel_y, IFM]. 220 * Data type supported: Same as @p input or QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL when @p input is QASYMM8/QASYMM8_SIGNED. 221 * @param[in] biases Biases tensor. A 1D tensor with shape [IFM]. Must be nullptr if not needed. 222 * Data type supported: Same as @p input, S32 when input is QASYMM8/QASYMM8_SIGNED. 223 * @param[in] conv_info Padding and stride information to use for the convolution. 224 * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1. 225 * @param[in] act_info (Optional) Activation layer information in case of a fused activation. 226 * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1). 227 */ 228 void configure(ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info, 229 unsigned int depth_multiplier = 1, const ActivationLayerInfo &act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U)); 230 231 /** Static function to check if given info will lead to a valid configuration of @ref NEDepthwiseConvolutionLayerGeneric 232 * 233 * @param[in] input Source tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/F16/F32. (Written to only for border filling). 234 * @param[in] output Destination tensor. Data type supported: same as @p input. 235 * @param[in] weights Weights tensor. These are 3D tensors with shape [kernel_x, kernel_y, IFM]. 236 * Data type supported: Same as @p input or QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL when @p input is QASYMM8/QASYMM8_SIGNED. 237 * @param[in] biases Biases tensor. A 1D tensor with shape [IFM]. Must be nullptr if not needed. 238 * Data type supported: Same as @p input, S32 when input is QASYMM8/QASYMM8_SIGNED. 239 * @param[in] conv_info Padding and stride information to use for the convolution. 240 * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1. 241 * @param[in] act_info (Optional) Activation layer information in case of a fused activation. 242 * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1). 243 * 244 * @return a status 245 */ 246 static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info, 247 unsigned int depth_multiplier = 1, const ActivationLayerInfo &act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U)); 248 249 // Inherited methods overriden: 250 void run() override; 251 void prepare() override; 252 253 private: 254 std::unique_ptr<NEDepthwiseConvolutionLayerNativeKernel> _depthwise_conv_kernel; 255 NEPermute _permute_input; 256 NEPermute _permute_weights; 257 NEPermute _permute_output; 258 NEActivationLayer _activationlayer_function; 259 Tensor _permuted_input; 260 Tensor _permuted_weights; 261 Tensor _permuted_output; 262 bool _is_prepared; 263 bool _is_nchw; 264 bool _is_activationlayer_enabled; 265 const ITensor *_original_weights; 266 }; 267 268 DepthwiseConvolutionFunction _depth_conv_func; 269 NEDepthwiseConvolutionLayerOptimizedInternal _func_optimized; 270 NEDepthwiseConvolutionLayerGeneric _func_generic; 271 }; 272 } // namespace arm_compute 273 #endif /* ARM_COMPUTE_NEDEPTHWISECONVOLUTION_H */