• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2017-2020 Arm Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
24 #ifndef ARM_COMPUTE_CLGEMMLOWREDUCTIONKERNEL_H
25 #define ARM_COMPUTE_CLGEMMLOWREDUCTIONKERNEL_H
26 
27 #include "src/core/CL/ICLKernel.h"
28 
29 namespace arm_compute
30 {
31 class ICLTensor;
32 struct GEMMLowpReductionKernelInfo;
33 
34 /** Common interface for all OpenCL reduction kernels */
35 class ICLGEMMLowpReductionKernel : public ICLKernel
36 {
37 public:
38     /** Constructor */
39     ICLGEMMLowpReductionKernel();
40     /** Prevent instances of this class from being copied (As this class contains pointers)*/
41     ICLGEMMLowpReductionKernel(const ICLGEMMLowpReductionKernel &) = delete;
42     /** Prevent instances of this class from being copied (As this class contains pointers)*/
43     ICLGEMMLowpReductionKernel &operator=(const ICLGEMMLowpReductionKernel &) = delete;
44     /** Allow instances of this class to be moved */
45     ICLGEMMLowpReductionKernel(ICLGEMMLowpReductionKernel &&) = default;
46     /** Allow instances of this class to be moved */
47     ICLGEMMLowpReductionKernel &operator=(ICLGEMMLowpReductionKernel &&) = default;
48 
49     /** Initialise the kernel's input and output.
50      *
51      * @param[in]  input  Input tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8.
52      * @param[out] output Output row-vector of sums of all the entries in each row/col of input tensor. Data type supported: S32
53      * @param[in]  info   Kernel metadata:
54      *                    - k            Number of matrix columns/rows depending on the type of reduction.
55      *                    - is_reshaped  True if the matrix has been reshaped.
56      *                    - scalar       Scalar value to multiply each reduced column/row by.
57      *                    - mul_byscalar True if each reduced column/row must be multiplied by a scalar value.
58      */
59     virtual void configure(const ICLTensor *input, ICLTensor *output, const GEMMLowpReductionKernelInfo &info) = 0;
60     /** Initialise the kernel's input and output.
61      *
62      * @param[in]  compile_context The compile context to be used.
63      * @param[in]  input           Input tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8.
64      * @param[out] output          Output row-vector of sums of all the entries in each row/col of input tensor. Data type supported: S32
65      * @param[in]  info            Kernel metadata:
66      *                             - k            Number of matrix columns/rows depending on the type of reduction.
67      *                             - is_reshaped  True if the matrix has been reshaped.
68      *                             - scalar       Scalar value to multiply each reduced column/row by.
69      *                             - mul_byscalar True if each reduced column/row must be multiplied by a scalar value.
70      */
71     virtual void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const GEMMLowpReductionKernelInfo &info) = 0;
72 
73 protected:
74     const ICLTensor *_input;
75     ICLTensor       *_output;
76 };
77 
78 /** OpenCL kernel used to compute the row-vectors of sums of all the entries in each row of Matrix A.
79  *
80  * @note This stage is needed to handle the offset of matrix product
81  *       https://github.com/google/gemmlowp/blob/master/doc/low-precision.md
82  */
83 class CLGEMMLowpMatrixAReductionKernel : public ICLGEMMLowpReductionKernel
84 {
85 public:
86     /** Initialise the kernel's input and output.
87      *
88      * @param[in]  mtx_a          Input tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8.
89      * @param[out] vector_sum_row Output row-vector of sums of all the entries in each row of mtx_a. Data type supported: S32
90      * @param[in]  info           Kernel metadata:
91      *                            - k            Number of matrix columns/rows depending on the type of reduction.
92      *                            - is_reshaped  True if the matrix has been reshaped.
93      *                            - scalar       Scalar value to multiply each reduced column/row by.
94      *                            - mul_byscalar True if each reduced column/row must be multiplied by a scalar value.
95      */
96     void configure(const ICLTensor *mtx_a, ICLTensor *vector_sum_row, const GEMMLowpReductionKernelInfo &info) override;
97     /** Initialise the kernel's input and output.
98      *
99      * @param[in]  compile_context The compile context to be used.
100      * @param[in]  mtx_a           Input tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8.
101      * @param[out] vector_sum_row  Output row-vector of sums of all the entries in each row of mtx_a. Data type supported: S32
102      * @param[in]  info            Kernel metadata:
103      *                             - k            Number of matrix columns/rows depending on the type of reduction.
104      *                             - is_reshaped  True if the matrix has been reshaped.
105      *                             - scalar       Scalar value to multiply each reduced column/row by.
106      *                             - mul_byscalar True if each reduced column/row must be multiplied by a scalar value.
107      */
108     void configure(const CLCompileContext &compile_context, const ICLTensor *mtx_a, ICLTensor *vector_sum_row, const GEMMLowpReductionKernelInfo &info) override;
109     /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMLowpMatrixAReductionKernel
110      *
111      * @param[in] mtx_a          Input tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8.
112      * @param[in] vector_sum_row Output row-vector of sums of all the entries in each row of mtx_a. Data type supported: S32
113      * @param[in] info           Kernel metadata:
114      *                           - k            Number of matrix columns/rows depending on the type of reduction.
115      *                           - is_reshaped  True if the matrix has been reshaped.
116      *                           - scalar       Scalar value to multiply each reduced column/row by.
117      *                           - mul_byscalar True if each reduced column/row must be multiplied by a scalar value.
118      *
119      * @return a status
120      */
121     static Status validate(const ITensorInfo *mtx_a, const ITensorInfo *vector_sum_row, const GEMMLowpReductionKernelInfo &info);
122 
123     // Inherited methods overridden:
124     void run(const Window &window, cl::CommandQueue &queue) override;
125 };
126 
127 /** OpenCL kernel used to compute the row-vectors of sums of all the entries in each column of Matrix B.
128  *
129  * @note This stage is needed to handle the offset of matrix product
130  *       https://github.com/google/gemmlowp/blob/master/doc/low-precision.md
131  */
132 class CLGEMMLowpMatrixBReductionKernel : public ICLGEMMLowpReductionKernel
133 {
134 public:
135     /** Initialise the kernel's input and output.
136      *
137      * @param[in]  mtx_b          Input tensor. Data type supported: Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8/QSYMM8_PER_CHANNEL.
138      * @param[out] vector_sum_col Output row-vector of sums of all the entries in each column of mtx_b. Data type supported: S32
139      * @param[in]  info           Kernel metadata:
140      *                            - k            Number of matrix columns/rows depending on the type of reduction.
141      *                            - is_reshaped  True if the matrix has been reshaped.
142      *                            - scalar       Scalar value to multiply each reduced column/row by.
143      *                            - mul_byscalar True if each reduced column/row must be multiplied by a scalar value.
144      */
145     void configure(const ICLTensor *mtx_b, ICLTensor *vector_sum_col, const GEMMLowpReductionKernelInfo &info) override;
146     /** Initialise the kernel's input and output.
147      *
148      * @param[in]  compile_context The compile context to be used.
149      * @param[in]  mtx_b           Input tensor. Data type supported: Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8/QSYMM8_PER_CHANNEL.
150      * @param[out] vector_sum_col  Output row-vector of sums of all the entries in each column of mtx_b. Data type supported: S32
151      * @param[in]  info            Kernel metadata:
152      *                             - k            Number of matrix columns/rows depending on the type of reduction.
153      *                             - is_reshaped  True if the matrix has been reshaped.
154      *                             - scalar       Scalar value to multiply each reduced column/row by.
155      *                             - mul_byscalar True if each reduced column/row must be multiplied by a scalar value.
156      */
157     void configure(const CLCompileContext &compile_context, const ICLTensor *mtx_b, ICLTensor *vector_sum_col, const GEMMLowpReductionKernelInfo &info) override;
158     /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMLowpMatrixBReductionKernel
159      *
160      * @param[in] mtx_b          Input tensor. Data type supported: Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8/QSYMM8_PER_CHANNEL.
161      * @param[in] vector_sum_col Output row-vector of sums of all the entries in each column of mtx_b. Data type supported: S32
162      * @param[in] info           Kernel metadata:
163      *                           - k            Number of matrix columns/rows depending on the type of reduction.
164      *                           - is_reshaped  True if the matrix has been reshaped.
165      *                           - scalar       Scalar value to multiply each reduced column/row by.
166      *                           - mul_byscalar True if each reduced column/row must be multiplied by a scalar value.
167      *
168      * @return a status
169      */
170     static Status validate(const ITensorInfo *mtx_b, const ITensorInfo *vector_sum_col, const GEMMLowpReductionKernelInfo &info);
171 
172     // Inherited methods overridden:
173     void run(const Window &window, cl::CommandQueue &queue) override;
174 };
175 } // namespace arm_compute
176 #endif /* ARM_COMPUTE_CLGEMMLOWREDUCTIONKERNEL_H */
177