1 /* 2 * Copyright (c) 2017-2020 Arm Limited. 3 * 4 * SPDX-License-Identifier: MIT 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a copy 7 * of this software and associated documentation files (the "Software"), to 8 * deal in the Software without restriction, including without limitation the 9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or 10 * sell copies of the Software, and to permit persons to whom the Software is 11 * furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included in all 14 * copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 * SOFTWARE. 23 */ 24 #ifndef ARM_COMPUTE_CLDEPTHWISECONVOLUTION_H 25 #define ARM_COMPUTE_CLDEPTHWISECONVOLUTION_H 26 27 #include "arm_compute/core/Types.h" 28 #include "arm_compute/runtime/CL/CLTensor.h" 29 #include "arm_compute/runtime/CL/functions/CLPermute.h" 30 #include "arm_compute/runtime/IFunction.h" 31 #include "arm_compute/runtime/MemoryGroup.h" 32 33 namespace arm_compute 34 { 35 class CLCompileContext; 36 class CLFillBorderKernel; 37 class CLDepthwiseConvolutionLayerNativeKernel; 38 class CLDepthwiseConvolutionLayerReshapeWeightsKernel; 39 class ICLDepthwiseConvolutionLayer3x3Kernel; 40 class ICLTensor; 41 42 /** Function to execute a depthwise convolution 43 */ 44 class CLDepthwiseConvolutionLayer : public IFunction 45 { 46 public: 47 /** Default constructor */ 48 CLDepthwiseConvolutionLayer(std::shared_ptr<IMemoryManager> memory_manager = nullptr); 49 /** Prevent instances of this class from being copied (As this class contains pointers) */ 50 CLDepthwiseConvolutionLayer(const CLDepthwiseConvolutionLayer &) = delete; 51 /** Default move constructor */ 52 CLDepthwiseConvolutionLayer(CLDepthwiseConvolutionLayer &&) = default; 53 /** Prevent instances of this class from being copied (As this class contains pointers) */ 54 CLDepthwiseConvolutionLayer &operator=(const CLDepthwiseConvolutionLayer &) = delete; 55 /** Default move assignment operator */ 56 CLDepthwiseConvolutionLayer &operator=(CLDepthwiseConvolutionLayer &&) = default; 57 /** Default destructor */ 58 ~CLDepthwiseConvolutionLayer(); 59 /** Initialize the function's source, destination, weights and convolution information. 60 * 61 * @param[in, out] input Source tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/FP16/FP32. Data layout supported: NHWC, NCHW 62 * @param[in] weights Weights tensor. These are 3D tensors with shape [kernel_x, kernel_y, IFM]. 63 * Data type supported: Same as @p input or QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL when @p input is QASYMM8. 64 * @param[in] biases Biases tensor. A 1D tensor with shape [IFM]. Must be nullptr if not needed. 65 * Data type supported: Same as @p input, S32 when input is QASYMM8/QASYMM8_SIGNED. 66 * @param[out] output Destination tensor. Data type supported: same as @p input. 67 * @param[in] conv_info Padding and stride information to use for the convolution. 68 * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1. 69 * @param[in] act_info (Optional) Activation layer information in case of a fused activation. 70 * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1). 71 */ 72 void configure(ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, unsigned int depth_multiplier = 1, 73 ActivationLayerInfo act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U)); 74 /** Initialize the function's source, destination, weights and convolution information. 75 * 76 * @param[in] compile_context The compile context to be used. 77 * @param[in, out] input Source tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/FP16/FP32. Data layout supported: NHWC, NCHW 78 * @param[in] weights Weights tensor. These are 3D tensors with shape [kernel_x, kernel_y, IFM]. 79 * Data type supported: Same as @p input or QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL when @p input is QASYMM8. 80 * @param[in] biases Biases tensor. A 1D tensor with shape [IFM]. Must be nullptr if not needed. 81 * Data type supported: Same as @p input, S32 when input is QASYMM8/QASYMM8_SIGNED. 82 * @param[out] output Destination tensor. Data type supported: same as @p input. 83 * @param[in] conv_info Padding and stride information to use for the convolution. 84 * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1. 85 * @param[in] act_info (Optional) Activation layer information in case of a fused activation. 86 * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1). 87 */ 88 void configure(const CLCompileContext &compile_context, ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, 89 unsigned int depth_multiplier = 1, ActivationLayerInfo act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U)); 90 91 /** Static function to check if given info will lead to a valid configuration of @ref CLDepthwiseConvolutionLayer 92 * 93 * @param[in] input Source tensor info. Data type supported: QASYMM8/QASYMM8_SIGNED/FP16/FP32. Data layout supported: NHWC, NCHW 94 * @param[in] weights Weights tensor info. These are 3D tensors with shape [kernel_x, kernel_y, IFM]. 95 * Data type supported: Same as @p input or QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL when @p input is QASYMM8. 96 * @param[in] biases Biases tensor info. A 1D tensor with shape [IFM]. Must be nullptr if not needed. 97 * Data type supported: Same as @p input, S32 when input is QASYMM8/QASYMM8_SIGNED. 98 * @param[in] output Destination tensor. Data type supported: same as @p input. 99 * @param[in] conv_info Padding and stride information to use for the convolution. 100 * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1. 101 * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU for 3x3 QASYMM8 supported. 102 * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1). 103 * 104 * @return a status 105 */ 106 static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info, unsigned int depth_multiplier = 1, 107 ActivationLayerInfo act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U)); 108 109 // Inherited methods overriden: 110 void run() override; 111 void prepare() override; 112 113 private: 114 /** Static function to choose the best depthwise convolution function for @ref CLDepthwiseConvolutionLayer 115 * 116 * @param[in] input Source tensor info. Data type supported: QASYMM8/FP16/FP32. Data layout supported: NHWC, NCHW 117 * @param[in] weights Weights tensor info. These are 3D tensors with shape [kernel_x, kernel_y, IFM]. 118 * Data type supported: Same as @p input or QASYMM8/QSYMM8_PER_CHANNEL when @p input is QASYMM8. 119 * @param[in] biases Biases tensor info. A 1D tensor with shape [IFM]. Must be nullptr if not needed. 120 * Data type supported: Same as @p input, S32 when input is QASYMM8. 121 * @param[in] output Destination tensor. Data type supported: same as @p input. 122 * @param[in] conv_info Padding and stride information to use for the convolution. 123 * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1. 124 * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU for 3x3 QASYMM8 supported. 125 * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1). 126 * @param[in] gpu_target (Optional) GPU target to validate the kernel for. Defaults to midgard. 127 * 128 * @return a Depthwise Convolution Function 129 */ 130 static DepthwiseConvolutionFunction get_depthwiseconvolution_function(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, 131 const PadStrideInfo &conv_info, unsigned int depth_multiplier = 1, 132 ActivationLayerInfo act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U), GPUTarget gpu_target = GPUTarget::MIDGARD); 133 134 /** Basic function to execute a depthwise convolution for kernel size 3x3xC (when data layout NCHW) or Cx3x3 (when data layout NHWC). This function calls the following OpenCL kernels: 135 * 136 * -# @ref CLDepthwiseConvolutionLayer3x3NCHWKernel (if data_layout == NCHW) 137 * -# @ref CLDepthwiseConvolutionLayer3x3NHWCKernel (if data_layout == NHWC) 138 * -# @ref CLDepthwiseConvolutionLayerReshapeWeightsKernel (if data_layout == NHWC) 139 * -# @ref CLFillBorderKernel (if pad_x or pad_y > 0) 140 * 141 */ 142 class CLDepthwiseConvolutionLayerInternal3x3 : public IFunction 143 { 144 public: 145 /** Default constructor */ 146 CLDepthwiseConvolutionLayerInternal3x3(std::shared_ptr<IMemoryManager> memory_manager = nullptr); 147 /** Prevent instances of this class from being copied (As this class contains pointers) */ 148 CLDepthwiseConvolutionLayerInternal3x3(const CLDepthwiseConvolutionLayerInternal3x3 &) = delete; 149 /** Default move constructor */ 150 CLDepthwiseConvolutionLayerInternal3x3(CLDepthwiseConvolutionLayerInternal3x3 &&) = default; 151 /** Prevent instances of this class from being copied (As this class contains pointers) */ 152 CLDepthwiseConvolutionLayerInternal3x3 &operator=(const CLDepthwiseConvolutionLayerInternal3x3 &) = delete; 153 /** Default move assignment operator */ 154 CLDepthwiseConvolutionLayerInternal3x3 &operator=(CLDepthwiseConvolutionLayerInternal3x3 &&) = default; 155 /** Initialize the function's source, destination, conv and border_size. 156 * 157 * @param[in, out] input Source tensor. Data type supported: QASYMM8/F16/F32. (Written to only for border filling). 158 * @param[in] weights Weights tensor. A 3D tensor with shape [3, 3, IFM]. 159 * Data type supported: Same as @p input or QASYMM8/QSYMM8_PER_CHANNEL when @p input is QASYMM8. 160 * @param[in] biases Biases tensor. A 1D tensor with shape [IFM]. Must be nullptr if not needed. 161 * Data type supported: Same as @p input. 162 * @param[out] output Destination tensor. Data type supported: same as @p input. 163 * @param[in] conv_info Padding and stride information to use for the convolution. 164 * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1. 165 * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU for 3x3 QASYMM8 supported. 166 * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1). 167 */ 168 void configure(ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, unsigned int depth_multiplier = 1, 169 ActivationLayerInfo act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U)); 170 /** Initialize the function's source, destination, conv and border_size. 171 * 172 * @param[in] compile_context The compile context to be used. 173 * @param[in, out] input Source tensor. Data type supported: QASYMM8/F16/F32. (Written to only for border filling). 174 * @param[in] weights Weights tensor. A 3D tensor with shape [3, 3, IFM]. 175 * Data type supported: Same as @p input or QASYMM8/QSYMM8_PER_CHANNEL when @p input is QASYMM8. 176 * @param[in] biases Biases tensor. A 1D tensor with shape [IFM]. Must be nullptr if not needed. 177 * Data type supported: Same as @p input. 178 * @param[out] output Destination tensor. Data type supported: same as @p input. 179 * @param[in] conv_info Padding and stride information to use for the convolution. 180 * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1. 181 * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU for 3x3 QASYMM8 supported. 182 * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1). 183 */ 184 void configure(const CLCompileContext &compile_context, ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, 185 unsigned int depth_multiplier = 1, ActivationLayerInfo act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U)); 186 187 /** Static function to check if given info will lead to a valid configuration of @ref CLDepthwiseConvolutionLayer3x3 188 * 189 * @param[in] input Source tensor info. Data type supported: QASYMM8 for all layouts, F16/F32 for NCHW. 190 * @param[in] weights Weights tensor info. A 3D tensor with shape [3, 3, IFM]. 191 * Data type supported: Same as @p input or QASYMM8/QSYMM8_PER_CHANNEL when @p input is QASYMM8. 192 * @param[in] biases Biases tensor info. A 1D tensor with shape [IFM]. Must be nullptr if not needed. 193 * Data type supported: Same as @p input, S32 when input is QASYMM8. 194 * @param[in] output Destination tensor. Data type supported: same as @p input. 195 * @param[in] conv_info Padding and stride information to use for the convolution. 196 * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1. 197 * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU for 3x3 QASYMM8 supported. 198 * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1). 199 * 200 * @return a status 201 */ 202 static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info, unsigned int depth_multiplier = 1, 203 ActivationLayerInfo act_info = ActivationLayerInfo(), GPUTarget gpu_target = GPUTarget::MIDGARD, const Size2D &dilation = Size2D(1U, 1U)); 204 205 // Inherited methods overriden: 206 void run() override; 207 void prepare() override; 208 set_memory_group(std::shared_ptr<IMemoryManager> memory_manager)209 void set_memory_group(std::shared_ptr<IMemoryManager> memory_manager) 210 { 211 _memory_group = MemoryGroup(std::move(memory_manager)); 212 }; 213 214 private: 215 MemoryGroup _memory_group; 216 std::unique_ptr<ICLDepthwiseConvolutionLayer3x3Kernel> _kernel; 217 std::unique_ptr<CLFillBorderKernel> _border_handler; 218 CLPermute _permute_input_to_nchw; 219 CLPermute _permute_weights_to_nchw; 220 CLPermute _permute_output_to_nhwc; 221 std::unique_ptr<CLDepthwiseConvolutionLayerReshapeWeightsKernel> _reshape_weights; 222 CLTensor _permuted_input; 223 CLTensor _permuted_weights; 224 CLTensor _permuted_output; 225 CLTensor _output_multipliers; 226 CLTensor _output_shifts; 227 const ITensor *_original_weights; 228 const ITensor *_input; 229 const ITensor *_output; 230 bool _needs_permute; 231 bool _needs_weights_reshape; 232 bool _is_prepared; 233 bool _is_quantized; 234 }; 235 236 /** Basic function to execute a generic depthwise convolution. This function calls the following OpenCL kernels: 237 * 238 * -# @ref CLDepthwiseConvolutionLayerNativeKernel 239 * -# @ref CLPermute (x 3) if the data layout is NCHW 240 * 241 */ 242 class CLDepthwiseConvolutionLayerGeneric : public IFunction 243 { 244 public: 245 /** Default constructor */ 246 CLDepthwiseConvolutionLayerGeneric(std::shared_ptr<IMemoryManager> memory_manager = nullptr); 247 /** Prevent instances of this class from being copied (As this class contains pointers) */ 248 CLDepthwiseConvolutionLayerGeneric(const CLDepthwiseConvolutionLayerGeneric &) = delete; 249 /** Default move constructor */ 250 CLDepthwiseConvolutionLayerGeneric(CLDepthwiseConvolutionLayerGeneric &&) = default; 251 /** Prevent instances of this class from being copied (As this class contains pointers) */ 252 CLDepthwiseConvolutionLayerGeneric &operator=(const CLDepthwiseConvolutionLayerGeneric &) = delete; 253 /** Default move assignment operator */ 254 CLDepthwiseConvolutionLayerGeneric &operator=(CLDepthwiseConvolutionLayerGeneric &&) = default; 255 /** Initialize the function's source, destination, weights and convolution information. 256 * 257 * @param[in, out] input Source tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/F32. (Written to only for border filling). 258 * @param[in] weights Weights tensor. These are 3D tensors with shape [kernel_x, kernel_y, IFM]. 259 * Data type supported: Same as @p input or QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL when @p input is QASYMM8. 260 * @param[in] biases Biases tensor. A 1D tensor with shape [IFM]. Must be nullptr if not needed. 261 * Data type supported: Same as @p input, S32 when input is QASYMM8/QASYMM8_SIGNED. 262 * @param[out] output Destination tensor. Data type supported: same as @p input. 263 * @param[in] conv_info Padding and stride information to use for the convolution. 264 * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1. 265 * @param[in] act_info (Optional) Activation layer information in case of a fused activation. 266 * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1). 267 */ 268 void configure(ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, 269 unsigned int depth_multiplier = 1, const ActivationLayerInfo &act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U)); 270 /** Initialize the function's source, destination, weights and convolution information. 271 * 272 * @param[in] compile_context The compile context to be used. 273 * @param[in, out] input Source tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/F32. (Written to only for border filling). 274 * @param[in] weights Weights tensor. These are 3D tensors with shape [kernel_x, kernel_y, IFM]. 275 * Data type supported: Same as @p input or QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL when @p input is QASYMM8. 276 * @param[in] biases Biases tensor. A 1D tensor with shape [IFM]. Must be nullptr if not needed. 277 * Data type supported: Same as @p input, S32 when input is QASYMM8/QASYMM8_SIGNED. 278 * @param[out] output Destination tensor. Data type supported: same as @p input. 279 * @param[in] conv_info Padding and stride information to use for the convolution. 280 * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1. 281 * @param[in] act_info (Optional) Activation layer information in case of a fused activation. 282 * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1). 283 */ 284 void configure(const CLCompileContext &compile_context, ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, 285 unsigned int depth_multiplier = 1, const ActivationLayerInfo &act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U)); 286 287 /** Static function to check if given info will lead to a valid configuration of @ref CLDepthwiseConvolutionLayerGeneric 288 * 289 * @param[in] input Source tensor info. Data type supported: QASYMM8/QASYMM8_SIGNED/F32. 290 * @param[in] weights Weights tensor info. These are 3D tensors with shape [kernel_x, kernel_y, IFM]. 291 * Data type supported: Same as @p input or QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL when @p input is QASYMM8. 292 * @param[in] biases Biases tensor info. A 1D tensor with shape [IFM]. Must be nullptr if not needed. 293 * Data type supported: Same as @p input, S32 when input is QASYMM8/QASYMM8_SIGNED. 294 * @param[in] output Destination tensor. Data type supported: same as @p input. 295 * @param[in] conv_info Padding and stride information to use for the convolution. 296 * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1. 297 * @param[in] act_info (Optional) Activation layer information in case of a fused activation. 298 * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1). 299 * 300 * @return a status 301 */ 302 static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info, 303 unsigned int depth_multiplier = 1, const ActivationLayerInfo &act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U)); 304 305 // Inherited methods overriden: 306 void run() override; 307 void prepare() override; 308 set_memory_group(std::shared_ptr<IMemoryManager> memory_manager)309 void set_memory_group(std::shared_ptr<IMemoryManager> memory_manager) 310 { 311 _memory_group = MemoryGroup(std::move(memory_manager)); 312 }; 313 314 private: 315 MemoryGroup _memory_group; 316 317 std::unique_ptr<CLDepthwiseConvolutionLayerNativeKernel> _dwc_native_kernel; 318 CLPermute _permute_input_to_nhwc; 319 CLPermute _permute_weights_to_nhwc; 320 CLPermute _permute_output_to_nchw; 321 322 CLTensor _permuted_input; 323 CLTensor _permuted_weights; 324 CLTensor _permuted_output; 325 CLTensor _output_multipliers; 326 CLTensor _output_shifts; 327 const ITensor *_original_weights; 328 const ITensor *_input; 329 const ITensor *_output; 330 331 bool _needs_permute; 332 bool _is_prepared; 333 bool _is_quantized; 334 }; 335 336 std::shared_ptr<IMemoryManager> _memory_manager; 337 338 DepthwiseConvolutionFunction _depth_conv_func; 339 CLDepthwiseConvolutionLayerInternal3x3 _func_3x3; 340 CLDepthwiseConvolutionLayerGeneric _func_generic; 341 }; 342 } // namespace arm_compute 343 #endif /*ARM_COMPUTE_CLDEPTHWISECONVOLUTION_H */ 344