• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2017-2020 Arm Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
24 #ifndef ARM_COMPUTE_NEGEMMLOWREDUCTIONKERNEL_H
25 #define ARM_COMPUTE_NEGEMMLOWREDUCTIONKERNEL_H
26 
27 #include "src/core/NEON/INEKernel.h"
28 
29 namespace arm_compute
30 {
31 // Forward declarations
32 class ITensor;
33 struct GEMMLowpReductionKernelInfo;
34 
35 /** Common interface for all NEON reduction kernels */
36 class INEGEMMLowpReductionKernel : public INEKernel
37 {
38 public:
39     /** Constructor */
40     INEGEMMLowpReductionKernel();
41     /** Prevent instances of this class from being copied (As this class contains pointers)*/
42     INEGEMMLowpReductionKernel(const INEGEMMLowpReductionKernel &) = delete;
43     /** Prevent instances of this class from being copied (As this class contains pointers)*/
44     INEGEMMLowpReductionKernel &operator=(const INEGEMMLowpReductionKernel &) = delete;
45     /** Allow instances of this class to be moved */
46     INEGEMMLowpReductionKernel(INEGEMMLowpReductionKernel &&) = default;
47     /** Allow instances of this class to be moved */
48     INEGEMMLowpReductionKernel &operator=(INEGEMMLowpReductionKernel &&) = default;
49     /** Default destructor */
50     virtual ~INEGEMMLowpReductionKernel() = default;
51 
52     /** Initialise the kernel's input and output.
53      *
54      * @param[in]  input  Input tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8/QSYMM8_PER_CHANNEL
55      * @param[out] output Output row-vector of sums of all the entries in each row/col of input tensor. Data type supported: S32
56      * @param[in]  info   Kernel metadata:
57      *                    - k            Number of matrix columns/rows depending on the type of reduction.
58      *                    - is_reshaped  True if the matrix has been reshaped.
59      *                    - scalar       Scalar value to multiply each reduced column/row by.
60      *                    - mul_byscalar True if each reduced column/row must be multiplied by a scalar value.
61      */
62     virtual void configure(const ITensor *input, ITensor *output, const GEMMLowpReductionKernelInfo &info) = 0;
63 
64 protected:
65     const ITensor *_input;
66     ITensor       *_output;
67     int32_t        _k;
68     int32_t        _scalar;
69     bool           _mul_by_scalar;
70 };
71 
72 /** NEON kernel used to compute the row-vectors of sums of all the entries in each row of Matrix A.
73  *
74  * @note This stage is needed to handle the offset of matrix product
75  *       https://github.com/google/gemmlowp/blob/master/doc/low-precision.md
76  */
77 class NEGEMMLowpMatrixAReductionKernel : public INEGEMMLowpReductionKernel
78 {
79 public:
name()80     const char *name() const override
81     {
82         return "NEGEMMLowpMatrixAReductionKernel";
83     }
84     /** Default constructor */
85     NEGEMMLowpMatrixAReductionKernel() = default;
86     /** Prevent instances of this class from being copied */
87     NEGEMMLowpMatrixAReductionKernel(const NEGEMMLowpMatrixAReductionKernel &) = delete;
88     /** Prevent instances of this class from being copied */
89     NEGEMMLowpMatrixAReductionKernel &operator=(const NEGEMMLowpMatrixAReductionKernel &) = delete;
90     /** Allow instances of this class to be moved */
91     NEGEMMLowpMatrixAReductionKernel(NEGEMMLowpMatrixAReductionKernel &&) = default;
92     /** Allow instances of this class to be moved */
93     NEGEMMLowpMatrixAReductionKernel &operator=(NEGEMMLowpMatrixAReductionKernel &&) = default;
94     /** Default destructor */
95     ~NEGEMMLowpMatrixAReductionKernel() = default;
96     /** Initialise the kernel's input and output.
97      *
98      * @param[in]  mtx_a          Input tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8/QSYMM8_PER_CHANNEL
99      * @param[out] vector_sum_row Output row-vector of sums of all the entries in each row of mtx_a. Data type supported: S32
100      * @param[in]  info           Kernel metadata:
101      *                            - k            (num_mtx_a_cols) Number of matrix A columns
102      *                            - is_reshaped  (is_interleaved4x4) True if the matrix A has been interleaved4x4
103      *                            - scalar       Scalar value to multiply each reduced row by.
104      *                            - mul_byscalar True if each reduced column must be multiplied by a scalar value.
105      */
106     void configure(const ITensor *mtx_a, ITensor *vector_sum_row, const GEMMLowpReductionKernelInfo &info) override;
107     /** Static function to check if given info will lead to a valid configuration of @ref NEGEMMLowpMatrixAReductionKernel
108      *
109      * @param[in] mtx_a          Input tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8/QSYMM8_PER_CHANNEL
110      * @param[in] vector_sum_row Output row-vector of sums of all the entries in each row of mtx_a. Data type supported: S32
111      * @param[in] info           Kernel metadata:
112      *                           - k            (num_mtx_a_cols) Number of matrix A columns
113      *                           - is_reshaped  (is_interleaved4x4) True if the matrix A has been interleaved4x4
114      *                           - scalar       Scalar value to multiply each reduced row by.
115      *                           - mul_byscalar True if each reduced column must be multiplied by a scalar value.
116      *
117      * @return a status
118      */
119     static Status validate(const ITensorInfo *mtx_a, const ITensorInfo *vector_sum_row, const GEMMLowpReductionKernelInfo &info);
120 
121     // Inherited methods overridden:
122     void run(const Window &window, const ThreadInfo &info) override;
123 
124 private:
125     /** Execution of the reduction kernel specialized on the input type
126      *
127      * @param[in] window Execution window
128      */
129     template <typename T>
130     void run_internal(const Window &window);
131 };
132 
133 /** NEON kernel used to compute the row-vectors of sums of all the entries in each column of Matrix B.
134  *
135  * @note This stage is needed to handle the offset of matrix product
136  *       https://github.com/google/gemmlowp/blob/master/doc/low-precision.md
137  */
138 class NEGEMMLowpMatrixBReductionKernel : public INEGEMMLowpReductionKernel
139 {
140 public:
name()141     const char *name() const override
142     {
143         return "NEGEMMLowpMatrixBReductionKernel";
144     }
145     /** Default constructor */
146     NEGEMMLowpMatrixBReductionKernel() = default;
147     /** Prevent instances of this class from being copied (As this class contains pointers) */
148     NEGEMMLowpMatrixBReductionKernel(const NEGEMMLowpMatrixBReductionKernel &) = delete;
149     /** Prevent instances of this class from being copied (As this class contains pointers) */
150     NEGEMMLowpMatrixBReductionKernel &operator=(const NEGEMMLowpMatrixBReductionKernel &) = delete;
151     /** Allow instances of this class to be moved */
152     NEGEMMLowpMatrixBReductionKernel(NEGEMMLowpMatrixBReductionKernel &&) = default;
153     /** Allow instances of this class to be moved */
154     NEGEMMLowpMatrixBReductionKernel &operator=(NEGEMMLowpMatrixBReductionKernel &&) = default;
155     /** Default destructor */
156     ~NEGEMMLowpMatrixBReductionKernel() = default;
157     /** Initialise the kernel's input and output.
158      *
159      * @param[in]  mtx_b          Input tensor. Data type supported: Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8/QSYMM8_PER_CHANNEL
160      * @param[out] vector_sum_col Output row-vector of sums of all the entries in each column of mtx_b. Data type supported: S32
161      * @param[in]  info           Kernel metadata:
162      *                            - k            (num_mtx_b_rows) Number of matrix B rows.
163      *                            - is_reshaped  (is_transposed1xW) True if the input tensor is transposed 1xW.
164      *                            - scalar       Scalar value to multiply each reduced row by.
165      *                            - mul_byscalar True if each reduced row must be multiplied by a scalar value.
166      */
167     void configure(const ITensor *mtx_b, ITensor *vector_sum_col, const GEMMLowpReductionKernelInfo &info) override;
168     /** Static function to check if given info will lead to a valid configuration of @ref NEGEMMLowpMatrixBReductionKernel
169      *
170      * @param[in] mtx_b          Input tensor. Data type supported: Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8/QSYMM8_PER_CHANNEL
171      * @param[in] vector_sum_col Output row-vector of sums of all the entries in each column of mtx_b. Data type supported: S32
172      * @param[in] info           Kernel metadata:
173      *                           - k            (num_mtx_b_rows) Number of matrix B rows.
174      *                           - is_reshaped  (is_transposed1xW) True if the input tensor is transposed 1xW.
175      *                           - scalar       Scalar value to multiply each reduced row by.
176      *                           - mul_byscalar True if each reduced row must be multiplied by a scalar value.
177      *
178      * @return a status
179      */
180     static Status validate(const ITensorInfo *mtx_b, const ITensorInfo *vector_sum_col, const GEMMLowpReductionKernelInfo &info);
181 
182     // Inherited methods overridden:
183     void run(const Window &window, const ThreadInfo &info) override;
184 
185 private:
186     /** Execution of the reduction kernel specialized on the input type
187      *
188      * @param[in] window Execution window
189      * @param[in] info   Thread-related information
190      */
191     template <typename T>
192     void run_internal(const Window &window, const ThreadInfo &info);
193 };
194 } // namespace arm_compute
195 
196 #endif /* ARM_COMPUTE_NEGEMMLOWREDUCTIONKERNEL_H */
197