1 /* 2 * Copyright (c) 2016-2020 Arm Limited. 3 * 4 * SPDX-License-Identifier: MIT 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a copy 7 * of this software and associated documentation files (the "Software"), to 8 * deal in the Software without restriction, including without limitation the 9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or 10 * sell copies of the Software, and to permit persons to whom the Software is 11 * furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included in all 14 * copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 * SOFTWARE. 23 */ 24 #ifndef ARM_COMPUTE_CLCONVOLUTIONKERNEL_H 25 #define ARM_COMPUTE_CLCONVOLUTIONKERNEL_H 26 27 #include "src/core/CL/ICLSimple2DKernel.h" 28 29 #include <cstdint> 30 31 namespace arm_compute 32 { 33 class ICLTensor; 34 35 /****************************************************************************************\ 36 * Square Convolution * 37 \****************************************************************************************/ 38 39 /** Interface for the kernel to run an arbitrary size convolution on a tensor. (Currently supports 3x3, 5x5, 7x7 and 9x9). 40 * The client can supply a convolution matrix \f$ C_{m,n} \f$. 41 * @f{eqnarray}{ 42 * k_0 &=& \frac{m}{2} \\ 43 * l_0 &=& \frac{n}{2} \\ 44 * sum &=& \sum_{k=0,l=0}^{k=m-1,l=n-1} input(x+k-k_0, y+l-l_0) C_{k,l} 45 * @f} 46 * 47 * @note The above equation for this function is similar to the default OpenCV Filter2D function, 48 * which actually computes a correlation and not a convolution. 49 * In case of a real convolution the convolution matrix should be flipped both horizontally and vertically. 50 */ 51 template <unsigned int matrix_size> 52 class CLConvolutionKernel : public ICLSimple2DKernel 53 { 54 public: 55 /** Initialise the kernel's input, output and border mode. 56 * 57 * @param[in] input Source tensor. Data types supported: U8. 58 * @param[out] output Destination tensor, Data types supported: U8, S16. 59 * @param[in] conv Convolution matrix to apply to the input tensor. 60 * @param[in] scale Scale of the convolution matrix. If 0 is passed, it will be set to the sum of the coefficients of the convolution or 1 if they add up to 0. 61 * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. 62 */ 63 void configure(const ICLTensor *input, ICLTensor *output, const int16_t *conv, uint32_t scale, bool border_undefined); 64 /** Initialise the kernel's input, output and border mode. 65 * 66 * @param[in] compile_context The compile context to be used. 67 * @param[in] input Source tensor. Data types supported: U8. 68 * @param[out] output Destination tensor, Data types supported: U8, S16. 69 * @param[in] conv Convolution matrix to apply to the input tensor. 70 * @param[in] scale Scale of the convolution matrix. If 0 is passed, it will be set to the sum of the coefficients of the convolution or 1 if they add up to 0. 71 * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. 72 */ 73 void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const int16_t *conv, uint32_t scale, bool border_undefined); 74 75 // Inherited methods overridden: 76 BorderSize border_size() const override; 77 }; 78 79 /** Interface for the kernel which applies a 3x3 convolution to a tensor. */ 80 using CLConvolution3x3Kernel = CLConvolutionKernel<3>; 81 /** Interface for the kernel which applies a 5x5 convolution to a tensor. */ 82 using CLConvolution5x5Kernel = CLConvolutionKernel<5>; 83 /** Interface for the kernel which applies a 7x7 convolution to a tensor. */ 84 using CLConvolution7x7Kernel = CLConvolutionKernel<7>; 85 /** Interface for the kernel which applies a 9x9 convolution to a tensor. */ 86 using CLConvolution9x9Kernel = CLConvolutionKernel<9>; 87 88 /****************************************************************************************\ 89 * Separable Square Convolution * 90 \****************************************************************************************/ 91 92 /** Kernel for the Horizontal pass of a Separable Convolution. Currently support 5x5, 7x7, 9x9 */ 93 template <unsigned int matrix_size> 94 class CLSeparableConvolutionHorKernel : public ICLSimple2DKernel 95 { 96 public: 97 /** Default Constructor */ 98 CLSeparableConvolutionHorKernel(); 99 /** Initialise the kernel's input, output and border mode. 100 * 101 * @param[in] input Source tensor. Data types supported: U8. 102 * @param[out] output Destination tensor, Data types supported: S16. 103 * @param[in] conv Convolution matrix to apply to the input tensor. 104 * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. 105 */ 106 void configure(const ICLTensor *input, ICLTensor *output, const int16_t *conv, bool border_undefined); 107 /** Initialise the kernel's input, output and border mode. 108 * 109 * @param[in] compile_context The compile context to be used. 110 * @param[in] input Source tensor. Data types supported: U8. 111 * @param[out] output Destination tensor, Data types supported: U16/S16/S32. 112 * @param[in] conv Convolution matrix to apply to the input tensor. 113 * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. 114 */ 115 void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const int16_t *conv, bool border_undefined); 116 117 // Inherited methods overridden: 118 BorderSize border_size() const override; 119 120 private: 121 BorderSize _border_size; /**< Border size */ 122 }; 123 124 /** Interface for the kernel which applies a horizontal pass of 5x5 convolution to a tensor. */ 125 using CLSeparableConvolution5x5HorKernel = CLSeparableConvolutionHorKernel<5>; 126 /** Interface for the kernel which applies a horizontal pass of 7x7 convolution to a tensor. */ 127 using CLSeparableConvolution7x7HorKernel = CLSeparableConvolutionHorKernel<7>; 128 /** Interface for the kernel which applies a horizontal pass of 9x9 convolution to a tensor. */ 129 using CLSeparableConvolution9x9HorKernel = CLSeparableConvolutionHorKernel<9>; 130 131 /** Kernel for the Vertical pass of a Separable Convolution. Currently supports 5x5, 7x7, 9x9 */ 132 template <unsigned int matrix_size> 133 class CLSeparableConvolutionVertKernel : public ICLSimple2DKernel 134 { 135 public: 136 /** Initialise the kernel's input, output and border mode. 137 * 138 * @param[in] input Source tensor. Data types supported: U16/S16/S32. 139 * @param[out] output Destination tensor, Data types supported: U8, S16. 140 * @param[in] conv Convolution matrix to apply to the input tensor. 141 * @param[in] scale Scale of the convolution matrix. 142 * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. 143 * @param[in] data_type Data type to use for intermeidate result. @sa data_type_for_convolution 144 */ 145 void configure(const ICLTensor *input, ICLTensor *output, const int16_t *conv, uint32_t scale, bool border_undefined, DataType data_type = DataType::S32); 146 /** Initialise the kernel's input, output and border mode. 147 * 148 * @param[in] compile_context The compile context to be used. 149 * @param[in] input Source tensor. Data types supported: U16/S16/S32. 150 * @param[out] output Destination tensor, Data types supported: U8, S16. 151 * @param[in] conv Convolution matrix to apply to the input tensor. 152 * @param[in] scale Scale of the convolution matrix. 153 * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. 154 * @param[in] data_type Data type to use for intermeidate result. @sa data_type_for_convolution 155 */ 156 void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const int16_t *conv, uint32_t scale, bool border_undefined, DataType data_type = DataType::S32); 157 158 // Inherited methods overridden: 159 BorderSize border_size() const override; 160 }; 161 162 /** Interface for the kernel which applies a vertical pass of 5x5 convolution to a tensor. */ 163 using CLSeparableConvolution5x5VertKernel = CLSeparableConvolutionVertKernel<5>; 164 /** Interface for the kernel which applies a vertical pass of 7x7 convolution to a tensor. */ 165 using CLSeparableConvolution7x7VertKernel = CLSeparableConvolutionVertKernel<7>; 166 /** Interface for the kernel which applies a vertical pass of 9x9 convolution to a tensor. */ 167 using CLSeparableConvolution9x9VertKernel = CLSeparableConvolutionVertKernel<9>; 168 169 /****************************************************************************************\ 170 * Rectangle Convolution * 171 \****************************************************************************************/ 172 173 /** Kernel for the running convolution on a rectangle matrix. 174 * 175 * @note Supports combinations of 3,5,7 and 9. 176 */ 177 class CLConvolutionRectangleKernel : public ICLKernel 178 { 179 public: 180 /** Default constructor */ 181 CLConvolutionRectangleKernel(); 182 /** Prevent instances of this class from being copied (As this class contains pointers) */ 183 CLConvolutionRectangleKernel(const CLConvolutionRectangleKernel &) = delete; 184 /** Prevent instances of this class from being copied (As this class contains pointers) */ 185 CLConvolutionRectangleKernel &operator=(const CLConvolutionRectangleKernel &) = delete; 186 /** Allow instances of this class to be moved */ 187 CLConvolutionRectangleKernel(CLConvolutionRectangleKernel &&) = default; 188 /** Allow instances of this class to be moved */ 189 CLConvolutionRectangleKernel &operator=(CLConvolutionRectangleKernel &&) = default; 190 /** Initialise the kernel's input, output and border mode. 191 * 192 * @param[in] input Source tensor. Data types supported: U8. 193 * @param[out] output Destination tensor, Data types supported: U8, S16. 194 * @param[in] conv Convolution matrix to apply to the input tensor. 195 * @param[in] width Width of convolution matrix (Number of columns) 196 * @param[in] height Height of convolution matrix (Number of rows) 197 * @param[in] scale Scale of the convolution matrix. If 0 is passed, it will be set to the sum of the coefficients of the convolution or 1 if they add up to 0. 198 * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. 199 */ 200 void configure(const ICLTensor *input, ICLTensor *output, const int16_t *conv, uint32_t width, uint32_t height, uint32_t scale, bool border_undefined); 201 /** Initialise the kernel's input, output and border mode. 202 * 203 * @param[in] compile_context The compile context to be used. 204 * @param[in] input Source tensor. Data types supported: U8. 205 * @param[out] output Destination tensor, Data types supported: U8, S16. 206 * @param[in] conv Convolution matrix to apply to the input tensor. 207 * @param[in] width Width of convolution matrix (Number of columns) 208 * @param[in] height Height of convolution matrix (Number of rows) 209 * @param[in] scale Scale of the convolution matrix. If 0 is passed, it will be set to the sum of the coefficients of the convolution or 1 if they add up to 0. 210 * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. 211 */ 212 void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const int16_t *conv, uint32_t width, uint32_t height, uint32_t scale, bool border_undefined); 213 214 // Inherited methods overridden: 215 void run(const Window &window, cl::CommandQueue &queue) override; 216 BorderSize border_size() const override; 217 218 private: 219 BorderSize _border_size; 220 const ICLTensor *_input; 221 ICLTensor *_output; 222 }; 223 } // namespace arm_compute 224 #endif /*ARM_COMPUTE_CLCONVOLUTIONKERNEL_H */ 225