• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2016-2020 Arm Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
24 #ifndef ARM_COMPUTE_CLCONVOLUTIONKERNEL_H
25 #define ARM_COMPUTE_CLCONVOLUTIONKERNEL_H
26 
27 #include "src/core/CL/ICLSimple2DKernel.h"
28 
29 #include <cstdint>
30 
31 namespace arm_compute
32 {
33 class ICLTensor;
34 
35 /****************************************************************************************\
36  *                                    Square Convolution                                *
37 \****************************************************************************************/
38 
39 /** Interface for the kernel to run an arbitrary size convolution on a tensor. (Currently supports 3x3, 5x5, 7x7 and 9x9).
40  * The client can supply a convolution matrix \f$ C_{m,n} \f$.
41  * @f{eqnarray}{
42  *  k_0 &=& \frac{m}{2}  \\
43  *  l_0 &=& \frac{n}{2}  \\
44  *  sum &=& \sum_{k=0,l=0}^{k=m-1,l=n-1} input(x+k-k_0, y+l-l_0) C_{k,l}
45  *  @f}
46  *
47  * @note The above equation for this function is similar to the default OpenCV Filter2D function,
48  *       which actually computes a correlation and not a convolution.
49  *       In case of a real convolution the convolution matrix should be flipped both horizontally and vertically.
50  */
51 template <unsigned int matrix_size>
52 class CLConvolutionKernel : public ICLSimple2DKernel
53 {
54 public:
55     /** Initialise the kernel's input, output and border mode.
56      *
57      * @param[in]  input            Source tensor. Data types supported: U8.
58      * @param[out] output           Destination tensor, Data types supported: U8, S16.
59      * @param[in]  conv             Convolution matrix to apply to the input tensor.
60      * @param[in]  scale            Scale of the convolution matrix. If 0 is passed, it will be set to the sum of the coefficients of the convolution or 1 if they add up to 0.
61      * @param[in]  border_undefined True if the border mode is undefined. False if it's replicate or constant.
62      */
63     void configure(const ICLTensor *input, ICLTensor *output, const int16_t *conv, uint32_t scale, bool border_undefined);
64     /** Initialise the kernel's input, output and border mode.
65      *
66      * @param[in]  compile_context  The compile context to be used.
67      * @param[in]  input            Source tensor. Data types supported: U8.
68      * @param[out] output           Destination tensor, Data types supported: U8, S16.
69      * @param[in]  conv             Convolution matrix to apply to the input tensor.
70      * @param[in]  scale            Scale of the convolution matrix. If 0 is passed, it will be set to the sum of the coefficients of the convolution or 1 if they add up to 0.
71      * @param[in]  border_undefined True if the border mode is undefined. False if it's replicate or constant.
72      */
73     void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const int16_t *conv, uint32_t scale, bool border_undefined);
74 
75     // Inherited methods overridden:
76     BorderSize border_size() const override;
77 };
78 
79 /** Interface for the kernel which applies a 3x3 convolution to a tensor. */
80 using CLConvolution3x3Kernel = CLConvolutionKernel<3>;
81 /** Interface for the kernel which applies a 5x5 convolution to a tensor. */
82 using CLConvolution5x5Kernel = CLConvolutionKernel<5>;
83 /** Interface for the kernel which applies a 7x7 convolution to a tensor. */
84 using CLConvolution7x7Kernel = CLConvolutionKernel<7>;
85 /** Interface for the kernel which applies a 9x9 convolution to a tensor. */
86 using CLConvolution9x9Kernel = CLConvolutionKernel<9>;
87 
88 /****************************************************************************************\
89  *                              Separable Square Convolution                            *
90 \****************************************************************************************/
91 
92 /** Kernel for the Horizontal pass of a Separable Convolution. Currently support 5x5, 7x7, 9x9 */
93 template <unsigned int matrix_size>
94 class CLSeparableConvolutionHorKernel : public ICLSimple2DKernel
95 {
96 public:
97     /** Default Constructor */
98     CLSeparableConvolutionHorKernel();
99     /** Initialise the kernel's input, output and border mode.
100      *
101      * @param[in]  input            Source tensor. Data types supported: U8.
102      * @param[out] output           Destination tensor, Data types supported: S16.
103      * @param[in]  conv             Convolution matrix to apply to the input tensor.
104      * @param[in]  border_undefined True if the border mode is undefined. False if it's replicate or constant.
105      */
106     void configure(const ICLTensor *input, ICLTensor *output, const int16_t *conv, bool border_undefined);
107     /** Initialise the kernel's input, output and border mode.
108      *
109      * @param[in]  compile_context  The compile context to be used.
110      * @param[in]  input            Source tensor. Data types supported: U8.
111      * @param[out] output           Destination tensor, Data types supported: U16/S16/S32.
112      * @param[in]  conv             Convolution matrix to apply to the input tensor.
113      * @param[in]  border_undefined True if the border mode is undefined. False if it's replicate or constant.
114      */
115     void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const int16_t *conv, bool border_undefined);
116 
117     // Inherited methods overridden:
118     BorderSize border_size() const override;
119 
120 private:
121     BorderSize _border_size; /**< Border size */
122 };
123 
124 /** Interface for the kernel which applies a horizontal pass of 5x5 convolution to a tensor. */
125 using CLSeparableConvolution5x5HorKernel = CLSeparableConvolutionHorKernel<5>;
126 /** Interface for the kernel which applies a horizontal pass of 7x7 convolution to a tensor. */
127 using CLSeparableConvolution7x7HorKernel = CLSeparableConvolutionHorKernel<7>;
128 /** Interface for the kernel which applies a horizontal pass of 9x9 convolution to a tensor. */
129 using CLSeparableConvolution9x9HorKernel = CLSeparableConvolutionHorKernel<9>;
130 
131 /** Kernel for the Vertical pass of a Separable Convolution. Currently supports 5x5, 7x7, 9x9 */
132 template <unsigned int matrix_size>
133 class CLSeparableConvolutionVertKernel : public ICLSimple2DKernel
134 {
135 public:
136     /** Initialise the kernel's input, output and border mode.
137      *
138      * @param[in]  input            Source tensor. Data types supported: U16/S16/S32.
139      * @param[out] output           Destination tensor, Data types supported: U8, S16.
140      * @param[in]  conv             Convolution matrix to apply to the input tensor.
141      * @param[in]  scale            Scale of the convolution matrix.
142      * @param[in]  border_undefined True if the border mode is undefined. False if it's replicate or constant.
143      * @param[in]  data_type        Data type to use for intermeidate result. @sa data_type_for_convolution
144      */
145     void configure(const ICLTensor *input, ICLTensor *output, const int16_t *conv, uint32_t scale, bool border_undefined, DataType data_type = DataType::S32);
146     /** Initialise the kernel's input, output and border mode.
147      *
148      * @param[in]  compile_context  The compile context to be used.
149      * @param[in]  input            Source tensor. Data types supported: U16/S16/S32.
150      * @param[out] output           Destination tensor, Data types supported: U8, S16.
151      * @param[in]  conv             Convolution matrix to apply to the input tensor.
152      * @param[in]  scale            Scale of the convolution matrix.
153      * @param[in]  border_undefined True if the border mode is undefined. False if it's replicate or constant.
154      * @param[in]  data_type        Data type to use for intermeidate result. @sa data_type_for_convolution
155      */
156     void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const int16_t *conv, uint32_t scale, bool border_undefined, DataType data_type = DataType::S32);
157 
158     // Inherited methods overridden:
159     BorderSize border_size() const override;
160 };
161 
162 /** Interface for the kernel which applies a vertical pass of 5x5 convolution to a tensor. */
163 using CLSeparableConvolution5x5VertKernel = CLSeparableConvolutionVertKernel<5>;
164 /** Interface for the kernel which applies a vertical pass of 7x7 convolution to a tensor. */
165 using CLSeparableConvolution7x7VertKernel = CLSeparableConvolutionVertKernel<7>;
166 /** Interface for the kernel which applies a vertical pass of 9x9 convolution to a tensor. */
167 using CLSeparableConvolution9x9VertKernel = CLSeparableConvolutionVertKernel<9>;
168 
169 /****************************************************************************************\
170  *                                 Rectangle Convolution                                *
171 \****************************************************************************************/
172 
173 /** Kernel for the running convolution on a rectangle matrix.
174  *
175  * @note Supports combinations of 3,5,7 and 9.
176  */
177 class CLConvolutionRectangleKernel : public ICLKernel
178 {
179 public:
180     /** Default constructor */
181     CLConvolutionRectangleKernel();
182     /** Prevent instances of this class from being copied (As this class contains pointers) */
183     CLConvolutionRectangleKernel(const CLConvolutionRectangleKernel &) = delete;
184     /** Prevent instances of this class from being copied (As this class contains pointers) */
185     CLConvolutionRectangleKernel &operator=(const CLConvolutionRectangleKernel &) = delete;
186     /** Allow instances of this class to be moved */
187     CLConvolutionRectangleKernel(CLConvolutionRectangleKernel &&) = default;
188     /** Allow instances of this class to be moved */
189     CLConvolutionRectangleKernel &operator=(CLConvolutionRectangleKernel &&) = default;
190     /** Initialise the kernel's input, output and border mode.
191      *
192      * @param[in]  input            Source tensor. Data types supported: U8.
193      * @param[out] output           Destination tensor, Data types supported: U8, S16.
194      * @param[in]  conv             Convolution matrix to apply to the input tensor.
195      * @param[in]  width            Width of convolution matrix (Number of columns)
196      * @param[in]  height           Height of convolution matrix (Number of rows)
197      * @param[in]  scale            Scale of the convolution matrix. If 0 is passed, it will be set to the sum of the coefficients of the convolution or 1 if they add up to 0.
198      * @param[in]  border_undefined True if the border mode is undefined. False if it's replicate or constant.
199      */
200     void configure(const ICLTensor *input, ICLTensor *output, const int16_t *conv, uint32_t width, uint32_t height, uint32_t scale, bool border_undefined);
201     /** Initialise the kernel's input, output and border mode.
202      *
203      * @param[in]  compile_context  The compile context to be used.
204      * @param[in]  input            Source tensor. Data types supported: U8.
205      * @param[out] output           Destination tensor, Data types supported: U8, S16.
206      * @param[in]  conv             Convolution matrix to apply to the input tensor.
207      * @param[in]  width            Width of convolution matrix (Number of columns)
208      * @param[in]  height           Height of convolution matrix (Number of rows)
209      * @param[in]  scale            Scale of the convolution matrix. If 0 is passed, it will be set to the sum of the coefficients of the convolution or 1 if they add up to 0.
210      * @param[in]  border_undefined True if the border mode is undefined. False if it's replicate or constant.
211      */
212     void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const int16_t *conv, uint32_t width, uint32_t height, uint32_t scale, bool border_undefined);
213 
214     // Inherited methods overridden:
215     void run(const Window &window, cl::CommandQueue &queue) override;
216     BorderSize border_size() const override;
217 
218 private:
219     BorderSize       _border_size;
220     const ICLTensor *_input;
221     ICLTensor       *_output;
222 };
223 } // namespace arm_compute
224 #endif /*ARM_COMPUTE_CLCONVOLUTIONKERNEL_H */
225