1 /* 2 * Copyright (c) 2016-2020 Arm Limited. 3 * 4 * SPDX-License-Identifier: MIT 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a copy 7 * of this software and associated documentation files (the "Software"), to 8 * deal in the Software without restriction, including without limitation the 9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or 10 * sell copies of the Software, and to permit persons to whom the Software is 11 * furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included in all 14 * copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 * SOFTWARE. 23 */ 24 #ifndef ARM_COMPUTE_NECONVOLUTIONKERNEL_H 25 #define ARM_COMPUTE_NECONVOLUTIONKERNEL_H 26 27 #include "src/core/NEON/INEKernel.h" 28 #include "src/core/NEON/INESimpleKernel.h" 29 30 #include <array> 31 #include <cstdint> 32 #include <vector> 33 34 namespace arm_compute 35 { 36 class ITensor; 37 38 /****************************************************************************************\ 39 * Square Convolution * 40 \****************************************************************************************/ 41 42 /** Interface for the kernel to run an arbitrary size convolution on a tensor. (Currently supports 3x3, 5x5, 7x7 and 9x9). 43 * The client can supply a convolution matrix \f$ C_{m,n} \f$. 44 * @f{eqnarray}{ 45 * k_0 &=& \frac{m}{2} \\ 46 * l_0 &=& \frac{n}{2} \\ 47 * sum &=& \sum_{k=0,l=0}^{k=m-1,l=n-1} input(x+k-k_0, y+l-l_0) C_{k,l} 48 * @f} 49 * 50 * @note The above equation for this function is similar to the default OpenCV Filter2D function, 51 * which actually computes a correlation and not a convolution. 52 * In case of a real convolution the convolution matrix should be flipped both horizontally and vertically. 53 */ 54 template <unsigned int matrix_size> 55 class NEConvolutionKernel : public INESimpleKernel 56 { 57 public: name()58 const char *name() const override 59 { 60 return "NEConvolutionKernel"; 61 } 62 /** Default constructor */ 63 NEConvolutionKernel(); 64 /** Prevent instances of this class from being copied (As this class contains pointers). */ 65 NEConvolutionKernel(const NEConvolutionKernel &) = delete; 66 /** Prevent instances of this class from being copied (As this class contains pointers). */ 67 NEConvolutionKernel &operator=(const NEConvolutionKernel &) = delete; 68 /** Allow instances of this class to be moved */ 69 NEConvolutionKernel(NEConvolutionKernel &&) = default; 70 /** Allow instances of this class to be moved */ 71 NEConvolutionKernel &operator=(NEConvolutionKernel &&) = default; 72 /** Default destructor */ 73 ~NEConvolutionKernel() = default; 74 /** Initialise the kernel's input, output and border mode. 75 * 76 * @param[in] input Source tensor. Data type supported: U8. 77 * @param[out] output Destination tensor. Data types supported: U8, S16. 78 * @param[in] conv Convolution matrix to apply to the input tensor. 79 * @param[in] scale Scale of the convolution matrix. If 0 is passed, it will be set to the sum of the coefficients of the convolution or 1 if they add up to 0. 80 * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. 81 */ 82 void configure(const ITensor *input, ITensor *output, const int16_t *conv, uint32_t scale, bool border_undefined); 83 84 // Inherited methods overridden: 85 void run(const Window &window, const ThreadInfo &info) override; 86 BorderSize border_size() const override; 87 88 private: 89 template <typename OutputType> 90 void convolution(const Window &win); 91 92 protected: 93 uint32_t _scale; /**< scale of the convolution */ 94 std::array<int16_t, matrix_size *matrix_size> _convolution; /**< convolution matrix */ 95 }; 96 97 /** Interface for the kernel which applied a 3x3 convolution to a tensor.*/ 98 using NEConvolution3x3Kernel = NEConvolutionKernel<3>; 99 /** Interface for the kernel which applied a 5x5 convolution to a tensor.*/ 100 using NEConvolution5x5Kernel = NEConvolutionKernel<5>; 101 /** Interface for the kernel which applied a 7x7 convolution to a tensor.*/ 102 using NEConvolution7x7Kernel = NEConvolutionKernel<7>; 103 ///** Interface for the kernel which applied a 9x9 convolution to a tensor.*/ 104 using NEConvolution9x9Kernel = NEConvolutionKernel<9>; 105 106 /****************************************************************************************\ 107 * Separable Square Convolution * 108 \****************************************************************************************/ 109 110 /** Kernel for the Horizontal pass of a Separable Convolution */ 111 template <unsigned int matrix_size> 112 class NESeparableConvolutionHorKernel : public INESimpleKernel 113 { 114 public: name()115 const char *name() const override 116 { 117 return "NESeparableConvolutionHorKernel"; 118 } 119 /** Default constructor */ 120 NESeparableConvolutionHorKernel(); 121 /** Prevent instances of this class from being copied (As this class contains pointers). */ 122 NESeparableConvolutionHorKernel(const NESeparableConvolutionHorKernel &) = delete; 123 /** Prevent instances of this class from being copied (As this class contains pointers). */ 124 NESeparableConvolutionHorKernel &operator=(const NESeparableConvolutionHorKernel &) = delete; 125 /** Allow instances of this class to be moved */ 126 NESeparableConvolutionHorKernel(NESeparableConvolutionHorKernel &&) = default; 127 /** Allow instances of this class to be moved */ 128 NESeparableConvolutionHorKernel &operator=(NESeparableConvolutionHorKernel &&) = default; 129 /** Default destructor */ 130 ~NESeparableConvolutionHorKernel() = default; 131 132 /** Initialise the kernel's input, output and border mode. 133 * 134 * @param[in] input Source tensor. Data type supported: U8. 135 * @param[out] output Destination tensor. Data types supported: U16, S16, S32. 136 * @param[in] conv_row Convolution matrix to apply to the input tensor. 137 * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. 138 */ 139 void configure(const ITensor *input, ITensor *output, const int16_t *conv_row, bool border_undefined); 140 141 // Inherited methods overridden: 142 void run(const Window &window, const ThreadInfo &info) override; 143 BorderSize border_size() const override; 144 145 private: 146 /** Apply the object's convolution to the given window of the input tensor.. 147 * 148 * @param[in] window Window to apply the convolution on. 149 */ 150 template <typename OutputType> 151 void convolve(const Window &window); 152 153 std::array<int16_t, matrix_size> _conv_row; /**< Convolution coefficients */ 154 BorderSize _border_size; /**< Border size */ 155 }; 156 157 /** Interface for the kernel which applied a 5x1 horizontal convolution to a tensor.*/ 158 using NESeparableConvolution5x5HorKernel = NESeparableConvolutionHorKernel<5>; 159 /** Interface for the kernel which applied a 7x1 horizontal convolution to a tensor.*/ 160 using NESeparableConvolution7x7HorKernel = NESeparableConvolutionHorKernel<7>; 161 /** Interface for the kernel which applied a 9x1 horizontal convolution to a tensor.*/ 162 using NESeparableConvolution9x9HorKernel = NESeparableConvolutionHorKernel<9>; 163 164 /** Kernel for the Vertical pass of a Separable Convolution */ 165 template <unsigned int matrix_size> 166 class NESeparableConvolutionVertKernel : public INESimpleKernel 167 { 168 public: name()169 const char *name() const override 170 { 171 return "NESeparableConvolutionVertKernel"; 172 } 173 /** Default constructor */ 174 NESeparableConvolutionVertKernel(); 175 /** Prevent instances of this class from being copied (As this class contains pointers). */ 176 NESeparableConvolutionVertKernel(const NESeparableConvolutionVertKernel &) = delete; 177 /** Prevent instances of this class from being copied (As this class contains pointers). */ 178 NESeparableConvolutionVertKernel &operator=(const NESeparableConvolutionVertKernel &) = delete; 179 /** Allow instances of this class to be moved */ 180 NESeparableConvolutionVertKernel(NESeparableConvolutionVertKernel &&) = default; 181 /** Allow instances of this class to be moved */ 182 NESeparableConvolutionVertKernel &operator=(NESeparableConvolutionVertKernel &&) = default; 183 /** Default destructor */ 184 ~NESeparableConvolutionVertKernel() = default; 185 186 /** Initialise the kernel's input, output and border mode. 187 * 188 * @param[in] input Source tensor. Data type supported: U16, S16, S32. 189 * @param[out] output Destination tensor, Data types supported: U8, S16. 190 * @param[in] conv_col Convolution matrix to apply to the input tensor. 191 * @param[in] scale Scale of the convolution matrix 192 * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. 193 */ 194 void configure(const ITensor *input, ITensor *output, const int16_t *conv_col, uint32_t scale, bool border_undefined); 195 196 // Inherited methods overridden: 197 void run(const Window &window, const ThreadInfo &info) override; 198 BorderSize border_size() const override; 199 200 private: 201 /** Apply the object's convolution to the given window of the input tensor. 202 * This function is used if the intermediate values have been stored as U16. 203 * 204 * @param[in] win Window to apply the convolution on. 205 */ 206 template <typename OutputType> 207 void convolution_u16(const Window &win); 208 /** Apply the object's convolution to the given window of the input tensor. 209 * This function is used if the intermediate values have been stored as S16. 210 * 211 * @param[in] win Window to apply the convolution on. 212 */ 213 template <typename OutputType> 214 void convolution_s16(const Window &win); 215 /** Apply the object's convolution to the given window of the input tensor. 216 * This function is used if the intermediate values have been stored as S32. 217 * 218 * @param[in] win Window to apply the convolution on. 219 */ 220 template <typename OutputType> 221 void convolution_s32(const Window &win); 222 223 std::array<int16_t, matrix_size> _conv_col; /**< Convolution coefficients */ 224 uint32_t _scale; /**< Convolution's scale */ 225 }; 226 227 /** Interface for the kernel which applied a 1x5 vertical convolution to a tensor.*/ 228 using NESeparableConvolution5x5VertKernel = NESeparableConvolutionVertKernel<5>; 229 /** Interface for the kernel which applied a 1x7 vertical convolution to a tensor.*/ 230 using NESeparableConvolution7x7VertKernel = NESeparableConvolutionVertKernel<7>; 231 /** Interface for the kernel which applied a 1x9 vertical convolution to a tensor.*/ 232 using NESeparableConvolution9x9VertKernel = NESeparableConvolutionVertKernel<9>; 233 234 /****************************************************************************************\ 235 * Rectangle Convolution * 236 \****************************************************************************************/ 237 238 /** Kernel for the running convolution on a rectangle matrix. 239 * 240 * @note Supports combinations of 3,5,7 and 9. 241 */ 242 class NEConvolutionRectangleKernel : public INEKernel 243 { 244 public: name()245 const char *name() const override 246 { 247 return "NEConvolutionRectangleKernel"; 248 } 249 /** Default constructor */ 250 NEConvolutionRectangleKernel(); 251 /** Prevent instances of this class from being copied (As this class contains pointers) */ 252 NEConvolutionRectangleKernel(NEConvolutionRectangleKernel &) = delete; 253 /** Prevent instances of this class from being copied (As this class contains pointers) */ 254 NEConvolutionRectangleKernel &operator=(NEConvolutionRectangleKernel &) = delete; 255 /** Allow instances of this class to be moved */ 256 NEConvolutionRectangleKernel(NEConvolutionRectangleKernel &&) = default; 257 /** Allow instances of this class to be moved */ 258 NEConvolutionRectangleKernel &operator=(NEConvolutionRectangleKernel &&) = default; 259 /** Default destructor */ 260 ~NEConvolutionRectangleKernel() = default; 261 /** Initialise the kernel's input, output and border mode. 262 * 263 * @param[in] input Source tensor. Data type supported: U8. 264 * @param[out] output Destination tensor, Data types supported: U8, S16. 265 * @param[in] conv Convolution matrix to apply to the input tensor. 266 * @param[in] width Width of convolution matrix (Number of columns) 267 * @param[in] height Height of convolution matrix (Number of rows) 268 * @param[in] scale Scale of the convolution matrix. If 0 is passed, it will be set to the sum of the coefficients of the convolution or 1 if they add up to 0. 269 * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. 270 */ 271 void configure(const ITensor *input, ITensor *output, const int16_t *conv, uint32_t width, uint32_t height, uint32_t scale, bool border_undefined); 272 273 // Inherited methods overridden: 274 void run(const Window &window, const ThreadInfo &info) override; 275 BorderSize border_size() const override; 276 277 private: 278 unsigned int get_index(uint32_t val); 279 /** Apply the object's convolution to the given window of the input tensor. 280 * 281 * @param[in] win Window to apply the convolution on. 282 */ 283 template <typename OutputType, unsigned int rows, unsigned int cols> 284 void convolution(const Window &win); 285 286 protected: 287 const ITensor *_input; /**< Input tensor */ 288 ITensor *_output; /**< Output tensor */ 289 uint32_t _scale; /**< Scale of the convolution */ 290 std::vector<int16_t> _convolution; /**< Convolution matrix */ 291 BorderSize _border_size; /**< Calculated border width */ 292 uint32_t _func_idx; /**< Index used to specify convolution function to be used */ 293 const static unsigned int _nr_supported_sizes 294 { 295 4 296 }; /**< Number of supported permutations */ 297 }; 298 } // namespace arm_compute 299 #endif /*ARM_COMPUTE_NECONVOLUTIONKERNEL_H */ 300