• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2016-2020 Arm Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
24 #ifndef ARM_COMPUTE_NECONVOLUTIONKERNEL_H
25 #define ARM_COMPUTE_NECONVOLUTIONKERNEL_H
26 
27 #include "src/core/NEON/INEKernel.h"
28 #include "src/core/NEON/INESimpleKernel.h"
29 
30 #include <array>
31 #include <cstdint>
32 #include <vector>
33 
34 namespace arm_compute
35 {
36 class ITensor;
37 
38 /****************************************************************************************\
39  *                                    Square Convolution                                *
40 \****************************************************************************************/
41 
42 /** Interface for the kernel to run an arbitrary size convolution on a tensor. (Currently supports 3x3, 5x5, 7x7 and 9x9).
43  * The client can supply a convolution matrix \f$ C_{m,n} \f$.
44  * @f{eqnarray}{
45  *  k_0 &=& \frac{m}{2}  \\
46  *  l_0 &=& \frac{n}{2}  \\
47  *  sum &=& \sum_{k=0,l=0}^{k=m-1,l=n-1} input(x+k-k_0, y+l-l_0) C_{k,l}
48  *  @f}
49  *
50  * @note The above equation for this function is similar to the default OpenCV Filter2D function,
51  *       which actually computes a correlation and not a convolution.
52  *       In case of a real convolution the convolution matrix should be flipped both horizontally and vertically.
53  */
54 template <unsigned int matrix_size>
55 class NEConvolutionKernel : public INESimpleKernel
56 {
57 public:
name()58     const char *name() const override
59     {
60         return "NEConvolutionKernel";
61     }
62     /** Default constructor */
63     NEConvolutionKernel();
64     /** Prevent instances of this class from being copied (As this class contains pointers). */
65     NEConvolutionKernel(const NEConvolutionKernel &) = delete;
66     /** Prevent instances of this class from being copied (As this class contains pointers). */
67     NEConvolutionKernel &operator=(const NEConvolutionKernel &) = delete;
68     /** Allow instances of this class to be moved */
69     NEConvolutionKernel(NEConvolutionKernel &&) = default;
70     /** Allow instances of this class to be moved */
71     NEConvolutionKernel &operator=(NEConvolutionKernel &&) = default;
72     /** Default destructor */
73     ~NEConvolutionKernel() = default;
74     /** Initialise the kernel's input, output and border mode.
75      *
76      * @param[in]  input            Source tensor. Data type supported: U8.
77      * @param[out] output           Destination tensor. Data types supported: U8, S16.
78      * @param[in]  conv             Convolution matrix to apply to the input tensor.
79      * @param[in]  scale            Scale of the convolution matrix. If 0 is passed, it will be set to the sum of the coefficients of the convolution or 1 if they add up to 0.
80      * @param[in]  border_undefined True if the border mode is undefined. False if it's replicate or constant.
81      */
82     void configure(const ITensor *input, ITensor *output, const int16_t *conv, uint32_t scale, bool border_undefined);
83 
84     // Inherited methods overridden:
85     void run(const Window &window, const ThreadInfo &info) override;
86     BorderSize border_size() const override;
87 
88 private:
89     template <typename OutputType>
90     void convolution(const Window &win);
91 
92 protected:
93     uint32_t _scale;                                             /**< scale of the convolution */
94     std::array<int16_t, matrix_size *matrix_size> _convolution;  /**< convolution matrix */
95 };
96 
97 /** Interface for the kernel which applied a 3x3 convolution to a tensor.*/
98 using NEConvolution3x3Kernel = NEConvolutionKernel<3>;
99 /** Interface for the kernel which applied a 5x5 convolution to a tensor.*/
100 using NEConvolution5x5Kernel = NEConvolutionKernel<5>;
101 /** Interface for the kernel which applied a 7x7 convolution to a tensor.*/
102 using NEConvolution7x7Kernel = NEConvolutionKernel<7>;
103 ///** Interface for the kernel which applied a 9x9 convolution to a tensor.*/
104 using NEConvolution9x9Kernel = NEConvolutionKernel<9>;
105 
106 /****************************************************************************************\
107  *                              Separable Square Convolution                            *
108 \****************************************************************************************/
109 
110 /** Kernel for the Horizontal pass of a Separable Convolution */
111 template <unsigned int matrix_size>
112 class NESeparableConvolutionHorKernel : public INESimpleKernel
113 {
114 public:
name()115     const char *name() const override
116     {
117         return "NESeparableConvolutionHorKernel";
118     }
119     /** Default constructor */
120     NESeparableConvolutionHorKernel();
121     /** Prevent instances of this class from being copied (As this class contains pointers). */
122     NESeparableConvolutionHorKernel(const NESeparableConvolutionHorKernel &) = delete;
123     /** Prevent instances of this class from being copied (As this class contains pointers). */
124     NESeparableConvolutionHorKernel &operator=(const NESeparableConvolutionHorKernel &) = delete;
125     /** Allow instances of this class to be moved */
126     NESeparableConvolutionHorKernel(NESeparableConvolutionHorKernel &&) = default;
127     /** Allow instances of this class to be moved */
128     NESeparableConvolutionHorKernel &operator=(NESeparableConvolutionHorKernel &&) = default;
129     /** Default destructor */
130     ~NESeparableConvolutionHorKernel() = default;
131 
132     /** Initialise the kernel's input, output and border mode.
133      *
134      * @param[in]  input            Source tensor. Data type supported: U8.
135      * @param[out] output           Destination tensor. Data types supported: U16, S16, S32.
136      * @param[in]  conv_row         Convolution matrix to apply to the input tensor.
137      * @param[in]  border_undefined True if the border mode is undefined. False if it's replicate or constant.
138      */
139     void configure(const ITensor *input, ITensor *output, const int16_t *conv_row, bool border_undefined);
140 
141     // Inherited methods overridden:
142     void run(const Window &window, const ThreadInfo &info) override;
143     BorderSize border_size() const override;
144 
145 private:
146     /** Apply the object's convolution to the given window of the input tensor..
147      *
148      * @param[in] window Window to apply the convolution on.
149      */
150     template <typename OutputType>
151     void convolve(const Window &window);
152 
153     std::array<int16_t, matrix_size> _conv_row; /**< Convolution coefficients */
154     BorderSize _border_size;                    /**< Border size */
155 };
156 
157 /** Interface for the kernel which applied a 5x1 horizontal convolution to a tensor.*/
158 using NESeparableConvolution5x5HorKernel = NESeparableConvolutionHorKernel<5>;
159 /** Interface for the kernel which applied a 7x1 horizontal convolution to a tensor.*/
160 using NESeparableConvolution7x7HorKernel = NESeparableConvolutionHorKernel<7>;
161 /** Interface for the kernel which applied a 9x1 horizontal convolution to a tensor.*/
162 using NESeparableConvolution9x9HorKernel = NESeparableConvolutionHorKernel<9>;
163 
164 /** Kernel for the Vertical pass of a Separable Convolution */
165 template <unsigned int matrix_size>
166 class NESeparableConvolutionVertKernel : public INESimpleKernel
167 {
168 public:
name()169     const char *name() const override
170     {
171         return "NESeparableConvolutionVertKernel";
172     }
173     /** Default constructor */
174     NESeparableConvolutionVertKernel();
175     /** Prevent instances of this class from being copied (As this class contains pointers). */
176     NESeparableConvolutionVertKernel(const NESeparableConvolutionVertKernel &) = delete;
177     /** Prevent instances of this class from being copied (As this class contains pointers). */
178     NESeparableConvolutionVertKernel &operator=(const NESeparableConvolutionVertKernel &) = delete;
179     /** Allow instances of this class to be moved */
180     NESeparableConvolutionVertKernel(NESeparableConvolutionVertKernel &&) = default;
181     /** Allow instances of this class to be moved */
182     NESeparableConvolutionVertKernel &operator=(NESeparableConvolutionVertKernel &&) = default;
183     /** Default destructor */
184     ~NESeparableConvolutionVertKernel() = default;
185 
186     /** Initialise the kernel's input, output and border mode.
187      *
188      * @param[in]  input            Source tensor. Data type supported: U16, S16, S32.
189      * @param[out] output           Destination tensor, Data types supported: U8, S16.
190      * @param[in]  conv_col         Convolution matrix to apply to the input tensor.
191      * @param[in]  scale            Scale of the convolution matrix
192      * @param[in]  border_undefined True if the border mode is undefined. False if it's replicate or constant.
193      */
194     void configure(const ITensor *input, ITensor *output, const int16_t *conv_col, uint32_t scale, bool border_undefined);
195 
196     // Inherited methods overridden:
197     void run(const Window &window, const ThreadInfo &info) override;
198     BorderSize border_size() const override;
199 
200 private:
201     /** Apply the object's convolution to the given window of the input tensor.
202      *  This function is used if the intermediate values have been stored as U16.
203      *
204      * @param[in] win Window to apply the convolution on.
205      */
206     template <typename OutputType>
207     void convolution_u16(const Window &win);
208     /** Apply the object's convolution to the given window of the input tensor.
209      *  This function is used if the intermediate values have been stored as S16.
210      *
211      * @param[in] win Window to apply the convolution on.
212      */
213     template <typename OutputType>
214     void convolution_s16(const Window &win);
215     /** Apply the object's convolution to the given window of the input tensor.
216      *  This function is used if the intermediate values have been stored as S32.
217      *
218      * @param[in] win Window to apply the convolution on.
219      */
220     template <typename OutputType>
221     void convolution_s32(const Window &win);
222 
223     std::array<int16_t, matrix_size> _conv_col; /**< Convolution coefficients */
224     uint32_t _scale;                            /**< Convolution's scale */
225 };
226 
227 /** Interface for the kernel which applied a 1x5 vertical convolution to a tensor.*/
228 using NESeparableConvolution5x5VertKernel = NESeparableConvolutionVertKernel<5>;
229 /** Interface for the kernel which applied a 1x7 vertical convolution to a tensor.*/
230 using NESeparableConvolution7x7VertKernel = NESeparableConvolutionVertKernel<7>;
231 /** Interface for the kernel which applied a 1x9 vertical convolution to a tensor.*/
232 using NESeparableConvolution9x9VertKernel = NESeparableConvolutionVertKernel<9>;
233 
234 /****************************************************************************************\
235  *                                 Rectangle Convolution                                *
236 \****************************************************************************************/
237 
238 /** Kernel for the running convolution on a rectangle matrix.
239  *
240  * @note Supports combinations of 3,5,7 and 9.
241  */
242 class NEConvolutionRectangleKernel : public INEKernel
243 {
244 public:
name()245     const char *name() const override
246     {
247         return "NEConvolutionRectangleKernel";
248     }
249     /** Default constructor */
250     NEConvolutionRectangleKernel();
251     /** Prevent instances of this class from being copied (As this class contains pointers) */
252     NEConvolutionRectangleKernel(NEConvolutionRectangleKernel &) = delete;
253     /** Prevent instances of this class from being copied (As this class contains pointers) */
254     NEConvolutionRectangleKernel &operator=(NEConvolutionRectangleKernel &) = delete;
255     /** Allow instances of this class to be moved */
256     NEConvolutionRectangleKernel(NEConvolutionRectangleKernel &&) = default;
257     /** Allow instances of this class to be moved */
258     NEConvolutionRectangleKernel &operator=(NEConvolutionRectangleKernel &&) = default;
259     /** Default destructor */
260     ~NEConvolutionRectangleKernel() = default;
261     /** Initialise the kernel's input, output and border mode.
262      *
263      * @param[in]  input            Source tensor. Data type supported: U8.
264      * @param[out] output           Destination tensor, Data types supported: U8, S16.
265      * @param[in]  conv             Convolution matrix to apply to the input tensor.
266      * @param[in]  width            Width of convolution matrix (Number of columns)
267      * @param[in]  height           Height of convolution matrix (Number of rows)
268      * @param[in]  scale            Scale of the convolution matrix. If 0 is passed, it will be set to the sum of the coefficients of the convolution or 1 if they add up to 0.
269      * @param[in]  border_undefined True if the border mode is undefined. False if it's replicate or constant.
270      */
271     void configure(const ITensor *input, ITensor *output, const int16_t *conv, uint32_t width, uint32_t height, uint32_t scale, bool border_undefined);
272 
273     // Inherited methods overridden:
274     void run(const Window &window, const ThreadInfo &info) override;
275     BorderSize border_size() const override;
276 
277 private:
278     unsigned int get_index(uint32_t val);
279     /** Apply the object's convolution to the given window of the input tensor.
280      *
281      * @param[in] win Window to apply the convolution on.
282      */
283     template <typename OutputType, unsigned int rows, unsigned int cols>
284     void convolution(const Window &win);
285 
286 protected:
287     const ITensor            *_input;       /**< Input tensor */
288     ITensor                  *_output;      /**< Output tensor */
289     uint32_t                  _scale;       /**< Scale of the convolution */
290     std::vector<int16_t>      _convolution; /**< Convolution matrix */
291     BorderSize                _border_size; /**< Calculated border width */
292     uint32_t                  _func_idx;    /**< Index used to specify convolution function to be used */
293     const static unsigned int _nr_supported_sizes
294     {
295         4
296     }; /**< Number of supported permutations */
297 };
298 } // namespace arm_compute
299 #endif /*ARM_COMPUTE_NECONVOLUTIONKERNEL_H */
300