1 /* 2 * Copyright (c) 2019 Arm Limited. 3 * 4 * SPDX-License-Identifier: MIT 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a copy 7 * of this software and associated documentation files (the "Software"), to 8 * deal in the Software without restriction, including without limitation the 9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or 10 * sell copies of the Software, and to permit persons to whom the Software is 11 * furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included in all 14 * copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 * SOFTWARE. 23 */ 24 #ifndef ARM_COMPUTE_NEDEPTHWISECONVOLUTIONASSEMBLYDISPATCH_H 25 #define ARM_COMPUTE_NEDEPTHWISECONVOLUTIONASSEMBLYDISPATCH_H 26 27 #include "arm_compute/runtime/IFunction.h" 28 29 #include "arm_compute/runtime/IMemoryManager.h" 30 #include "arm_compute/runtime/MemoryGroup.h" 31 #include "arm_compute/runtime/Tensor.h" 32 33 namespace arm_compute 34 { 35 /** Depthwise convolution assembly kernel glue */ 36 class NEDepthwiseConvolutionAssemblyDispatch : public IFunction 37 { 38 public: 39 /** Default constructor 40 * 41 * @param[in,out] memory_manager Memory manager to use 42 */ 43 NEDepthwiseConvolutionAssemblyDispatch(std::shared_ptr<IMemoryManager> memory_manager = nullptr); 44 /** Prevent instances of this class from being copied (As this class contains pointers) */ 45 NEDepthwiseConvolutionAssemblyDispatch(const NEDepthwiseConvolutionAssemblyDispatch &) = delete; 46 /** Default move constructor */ 47 NEDepthwiseConvolutionAssemblyDispatch(NEDepthwiseConvolutionAssemblyDispatch &&) = default; 48 /** Prevent instances of this class from being copied (As this class contains pointers) */ 49 NEDepthwiseConvolutionAssemblyDispatch &operator=(const NEDepthwiseConvolutionAssemblyDispatch &) = delete; 50 /** Default move assignment operator */ 51 NEDepthwiseConvolutionAssemblyDispatch &operator=(NEDepthwiseConvolutionAssemblyDispatch &&) = default; 52 /** Default destructor */ 53 ~NEDepthwiseConvolutionAssemblyDispatch(); 54 /** Initialize the function's source, destination, kernels and border_size. 55 * 56 * @note Supports only NHWC format 57 * 58 * @param[in] input Source tensor. Data type supported: QASYMM8/F16/F32. (Written to only for border filling). 59 * @param[in] weights Weights tensor. These are 3D tensors with shape [W, H, IFM]. Data type supported: Same as @p input. 60 * @param[in] bias (Optional) Biases tensor. A 1D tensor with shape [IFM]. Must be nullptr if not needed. 61 * Data type supported: Same as @p input. 62 * @param[out] output Destination tensor. Data type supported: same as @p input. 63 * @param[in] conv_info Padding and stride information to use for the convolution. 64 * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1. 65 * @param[in] act_info (Optional) Activation layer information in case of a fused activation. 66 * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1). 67 */ 68 void configure(const ITensor *input, const ITensor *weights, const ITensor *bias, ITensor *output, 69 const PadStrideInfo &conv_info, unsigned int depth_multiplier = 1, const ActivationLayerInfo &act_info = ActivationLayerInfo(), 70 const Size2D &dilation = Size2D(1, 1)); 71 /** Static function to check if given info will lead to a valid configuration of @ref NEDepthwiseConvolutionAssemblyDispatch 72 * 73 * @note Supports only NHWC format 74 * 75 * @param[in] input Source tensor. Data type supported: QASYMM8/F16/F32. (Written to only for border filling). 76 * @param[in] weights Weights tensor. These are 3D tensors with shape [W, H, IFM]. Data type supported: Same as @p input. 77 * @param[in] bias (Optional) Biases tensor. A 1D tensor with shape [IFM]. Must be nullptr if not needed. 78 * Data type supported: Same as @p input. 79 * @param[out] output Destination tensor. Data type supported: same as @p input. 80 * @param[in] conv_info Padding and stride information to use for the convolution. 81 * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1. 82 * @param[in] act_info (Optional) Activation layer information in case of a fused activation. 83 * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1). 84 * 85 * @return An error status 86 */ 87 static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *bias, const ITensorInfo *output, 88 const PadStrideInfo &conv_info, unsigned int depth_multiplier = 1, const ActivationLayerInfo &act_info = ActivationLayerInfo(), 89 const Size2D &dilation = Size2D(1, 1)); 90 /** Check if the optimized kernel can be used for the given kernel sizes and strides 91 * 92 * @warning Even if this return true the inputs and outputs might need to get permuted as the only layout supported is NHWC 93 * 94 * @param[in] input Input tensor info. 95 * @param[in] weights Weights tensor info. 96 * @param[in] conv_info Convolution layer metadata. 97 * @param[in] depth_multiplier (Optional) Depth multiplier to be used. 98 * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1). 99 * 100 * @return True if the assembly kernel could be used else false. Note that transformations of input/output could be needed. 101 */ 102 static bool is_optimized_supported(const ITensorInfo *input, const ITensorInfo *weights, PadStrideInfo conv_info, unsigned int depth_multiplier = 1, const Size2D &dilation = Size2D(1, 1)); 103 104 // Inherited methods overridden: 105 void run() override; 106 void prepare() override; 107 108 private: 109 struct LocalImpl; 110 111 private: 112 MemoryGroup _memory_group; 113 const ITensor *_input; 114 const ITensor *_weights; 115 const ITensor *_bias; 116 ITensor *_output; 117 Tensor _packed_weights; 118 Tensor _workspace; 119 bool _is_prepared; 120 std::unique_ptr<LocalImpl> _pImpl; 121 }; 122 } // namespace arm_compute 123 #endif /* ARM_COMPUTE_NEDEPTHWISECONVOLUTIONASSEMBLYDISPATCH_H */ 124