• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2017-2020 Arm Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
24 #ifndef ARM_COMPUTE_CLGEMMLOWPOUTPUTSTAGE_H
25 #define ARM_COMPUTE_CLGEMMLOWPOUTPUTSTAGE_H
26 
27 #include "arm_compute/core/Error.h"
28 #include "arm_compute/runtime/CL/ICLSimpleFunction.h"
29 
30 #include <limits>
31 
32 /** This file contains all available output stages for GEMMLowp on OpenCL.
33  *
34  *  In gemmlowp, the "output stage" is the process that takes a final int32 accumulator value (the output of @ref CLGEMMLowpMatrixMultiplyCore),
35  *  and processes it to obtain the final QASYMM8/QASYMM8_SIGNED value.
36  *
37  *  More information about the GEMMLowp output stage can be found at https://github.com/google/gemmlowp/blob/master/doc/output.md
38  */
39 
40 namespace arm_compute
41 {
42 class CLCompileContext;
43 class ITensor;
44 class ICLTensor;
45 class ITensorInfo;
46 struct GEMMLowpOutputStageInfo;
47 
48 /** Basic function to execute CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint on OpenCL.
49  *
50  *  CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint depends on 3 parameters:
51  *
52  *  result_fixedpoint_multiplier, result_shift, result_offset_after_shift
53  *
54  * The final result is:
55  *
56  * (FixedPointMul(input[i][k], result_fixedpoint_multiplier) >> result_shift) + result_offset_after_shift
57  *
58  * where FixedPointMul(x, y) is the nearest integer to the following
59  * mathematical expression, evaluated without overflow or intermediate rounding:
60  *
61  * (x * y) / 2^31
62  *
63  * For more information: https://github.com/google/gemmlowp/blob/master/public/output_stages.h#L68
64  *
65  * In case the bias tensor is provided, the final result is:
66  *
67  * ((FixedPointMul(input[i][k] + bias[k], result_fixedpoint_multiplier)) >> result_shift) + result_offset_after_shift
68  *
69  *  This function calls the following OpenCL kernels:
70  *
71  * -# @ref CLGEMMLowpQuantizeDownInt32ScaleByFixedPointKernel
72  *
73  * @note The function accepts also 2 optional input arguments (min and max) which can be used to implement "rectified linear unit" activation functions
74  *       after the result is shifted right by result_shift
75 */
76 class CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint : public ICLSimpleFunction
77 {
78 public:
79     /** Initialise the kernel's inputs, output
80      *
81      * @param[in]  input                        Input tensor. Data type supported: S32
82      * @param[in]  bias                         Biases tensor. Only shared biases supported and it can be a nullptr if the biases addition is not required.
83      *                                          Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input.
84      * @param[out] output                       Output tensor. Data type supported: QASYMM8
85      * @param[in]  result_fixedpoint_multiplier Fixed point value to be multiplied to each element of the input matrix when once the result_offset has been add
86      * @param[in]  result_shift                 Number of bits to shift right the result after the fixed point multiplication
87      * @param[in]  result_offset_after_shift    Offset to be applied to result before converting it back to QASYMM8
88      * @param[in]  min                          (Optional) Min value used to saturate down the output result before converting back to QASYMM8. Defaults to the minimum possible 32-bit signed integer.
89      * @param[in]  max                          (Optional) Max value used to saturate up the output result before converting back to QASYMM8,
90      *                                          Along with @p min, this value can be used to implement "rectified linear unit" activation functions. Defaults to the maximum possible 32-bit signed integer.
91      */
92     void configure(const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, int result_fixedpoint_multiplier, int result_shift, int result_offset_after_shift,
93                    int min = std::numeric_limits<int32_t>::lowest(), int max = std::numeric_limits<int32_t>::max());
94     /** Initialise the kernel's inputs, output
95      *
96      * @param[in]  compile_context              The compile context to be used.
97      * @param[in]  input                        Input tensor. Data type supported: S32
98      * @param[in]  bias                         Biases tensor. Only shared biases supported and it can be a nullptr if the biases addition is not required.
99      *                                          Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input.
100      * @param[out] output                       Output tensor. Data type supported: QASYMM8
101      * @param[in]  result_fixedpoint_multiplier Fixed point value to be multiplied to each element of the input matrix when once the result_offset has been add
102      * @param[in]  result_shift                 Number of bits to shift right the result after the fixed point multiplication
103      * @param[in]  result_offset_after_shift    Offset to be applied to result before converting it back to QASYMM8
104      * @param[in]  min                          (Optional) Min value used to saturate down the output result before converting back to QASYMM8. Defaults to the minimum possible 32-bit signed integer.
105      * @param[in]  max                          (Optional) Max value used to saturate up the output result before converting back to QASYMM8,
106      *                                          Along with @p min, this value can be used to implement "rectified linear unit" activation functions. Defaults to the maximum possible 32-bit signed integer.
107      */
108     void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, int result_fixedpoint_multiplier, int result_shift,
109                    int result_offset_after_shift,
110                    int min = std::numeric_limits<int32_t>::lowest(), int max = std::numeric_limits<int32_t>::max());
111     /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint
112      *
113      * @param[in] input  Input tensor. It is the output of @ref CLGEMMLowpMatrixMultiplyCore function. Data type supported: S32
114      * @param[in] bias   Biases tensor. Only shared biases supported and it can be a nullptr if the addition of biases is not required.
115      *                   Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input.
116      * @param[in] output Output tensor. Data type supported: QASYMM8
117      * @param[in] min    (Optional) Min value used to saturate down the output result before converting back to QASYMM8. Defaults to the minimum possible 32-bit signed integer.
118      * @param[in] max    (Optional) Max value used to saturate up the output result before converting back to QASYMM8,
119      *                            Along with @p min, this value can be used to implement "rectified linear unit" activation functions. Defaults to the maximum possible 32-bit signed integer.
120      *
121      * @return a status
122      */
123     static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, int min = std::numeric_limits<int32_t>::lowest(), int max = std::numeric_limits<int32_t>::max());
124 };
125 
126 /** Basic function to execute CLGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPoint on OpenCL.
127  *
128  *  CLGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPoint depends on 3 parameters:
129  *
130  *  result_fixedpoint_multiplier, result_shift, result_offset_after_shift
131  *
132  * The final result is:
133  *
134  * (FixedPointMul(input[i][k], result_fixedpoint_multiplier) >> result_shift) + result_offset_after_shift
135  *
136  * where FixedPointMul(x, y) is the nearest integer to the following
137  * mathematical expression, evaluated without overflow or intermediate rounding:
138  *
139  * (x * y) / 2^31
140  *
141  * For more information: https://github.com/google/gemmlowp/blob/master/public/output_stages.h#L68
142  *
143  * In case the bias tensor is provided, the final result is:
144  *
145  * ((FixedPointMul(input[i][k] + bias[k], result_fixedpoint_multiplier)) >> result_shift) + result_offset_after_shift
146  *
147  *  This function calls the following OpenCL kernels:
148  *
149  * -# @ref CLGEMMLowpQuantizeDownInt32ScaleByFixedPointKernel
150  *
151  * @note The function accepts also 2 optional input arguments (min and max) which can be used to implement "rectified linear unit" activation functions
152  *       after the result is shifted right by result_shift
153 */
154 class CLGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPoint : public ICLSimpleFunction
155 {
156 public:
157     /** Initialise the kernel's inputs, output
158      *
159      * @param[in]  input                        Input tensor. Data type supported: S32
160      * @param[in]  bias                         Biases tensor. Only shared biases supported and it can be a nullptr if the biases addition is not required.
161      *                                          Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input.
162      * @param[out] output                       Output tensor. Data type supported: QASYMM8_SIGNED
163      * @param[in]  result_fixedpoint_multiplier Fixed point value to be multiplied to each element of the input matrix when once the result_offset has been add
164      * @param[in]  result_shift                 Number of bits to shift right the result after the fixed point multiplication
165      * @param[in]  result_offset_after_shift    Offset to be applied to result before converting it back to QASYMM8_SIGNED
166      * @param[in]  min                          (Optional) Min value used to saturate down the output result before converting back to QASYMM8_SIGNED. Defaults to the minimum possible 32-bit signed integer.
167      * @param[in]  max                          (Optional) Max value used to saturate up the output result before converting back to QASYMM8_SIGNED. Defaults to 0
168      *                                          Along with @p min, this value can be used to implement "rectified linear unit" activation functions. Defaults to the maximum possible 32-bit signed integer.
169      */
170     void configure(const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, int result_fixedpoint_multiplier, int result_shift, int result_offset_after_shift,
171                    int min = std::numeric_limits<int32_t>::lowest(), int max = std::numeric_limits<int32_t>::max());
172     /** Initialise the kernel's inputs, output
173      *
174      * @param[in]  compile_context              The compile context to be used.
175      * @param[in]  input                        Input tensor. Data type supported: S32
176      * @param[in]  bias                         Biases tensor. Only shared biases supported and it can be a nullptr if the biases addition is not required.
177      *                                          Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input.
178      * @param[out] output                       Output tensor. Data type supported: QASYMM8_SIGNED
179      * @param[in]  result_fixedpoint_multiplier Fixed point value to be multiplied to each element of the input matrix when once the result_offset has been add
180      * @param[in]  result_shift                 Number of bits to shift right the result after the fixed point multiplication
181      * @param[in]  result_offset_after_shift    Offset to be applied to result before converting it back to QASYMM8_SIGNED
182      * @param[in]  min                          (Optional) Min value used to saturate down the output result before converting back to QASYMM8_SIGNED. Defaults to the minimum possible 32-bit signed integer.
183      * @param[in]  max                          (Optional) Max value used to saturate up the output result before converting back to QASYMM8_SIGNED. Defaults to 0
184      *                                          Along with @p min, this value can be used to implement "rectified linear unit" activation functions. Defaults to the maximum possible 32-bit signed integer.
185      */
186     void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, int result_fixedpoint_multiplier, int result_shift,
187                    int result_offset_after_shift,
188                    int min = std::numeric_limits<int32_t>::lowest(), int max = std::numeric_limits<int32_t>::max());
189     /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPoint
190      *
191      * @param[in] input  Input tensor. It is the output of @ref CLGEMMLowpMatrixMultiplyCore function. Data type supported: S32
192      * @param[in] bias   Biases tensor. Only shared biases supported and it can be a nullptr if the addition of biases is not required.
193      *                   Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input.
194      * @param[in] output Output tensor. Data type supported: QASYMM8_SIGNED
195      * @param[in] min    (Optional) Min value used to saturate down the output result before converting back to QASYMM8_SIGNED. Defaults to the minimum possible 32-bit signed integer.
196      * @param[in] max    (Optional) Max value used to saturate up the output result before converting back to QASYMM8_SIGNED. Defaults to 0
197      *                            Along with @p min, this value can be used to implement "rectified linear unit" activation functions. Defaults to the maximum possible 32-bit signed integer.
198      *
199      * @return a status
200      */
201     static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, int min = std::numeric_limits<int32_t>::lowest(), int max = std::numeric_limits<int32_t>::max());
202 };
203 
204 /** Basic function to execute CLGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPoint on OpenCL.
205  *
206  *  CLGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPoint depends on 2 parameters:
207  *
208  *  result_fixedpoint_multiplier, result_shift
209  *
210  * The final result is:
211  *
212  * (FixedPointMul(input[i][k], result_fixedpoint_multiplier) >> result_shift)
213  *
214  * where FixedPointMul(x, y) is the nearest integer to the following
215  * mathematical expression, evaluated without overflow or intermediate rounding:
216  *
217  * (x * y) / 2^31
218  *
219  * For more information: https://github.com/google/gemmlowp/blob/master/public/output_stages.h#L68
220  *
221  * In case the bias tensor is provided, the final result is:
222  *
223  * ((FixedPointMul(input[i][k] + bias[k], result_fixedpoint_multiplier)) >> result_shift) + result_offset_after_shift
224  *
225  *  This function calls the following NEON kernels:
226  *
227  * -# @ref CLGEMMLowpQuantizeDownInt32ScaleByFixedPointKernel
228  *
229  * @note The function accepts also 2 optional input arguments (min and max) which can be used to implement "rectified linear unit" activation functions
230  *       after the result is shifted right by result_shift
231 */
232 class CLGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPoint : public ICLSimpleFunction
233 {
234 public:
235     /** Initialise the kernel's inputs, output
236      *
237      * @param[in]  input                        Input tensor. Data type supported: S32
238      * @param[in]  bias                         Biases tensor. Only shared biases supported and it can be a nullptr if the biases addition is not required.
239      *                                          Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input.
240      * @param[out] output                       Output tensor. Data type supported: QSYMM16
241      * @param[in]  result_fixedpoint_multiplier Fixed point value to be multiplied to each element of the input matrix when once the result_offset has been add
242      * @param[in]  result_shift                 Number of bits to shift right the result after the fixed point multiplication
243      * @param[in]  min                          (Optional) Min value used to saturate down the output result before converting back to QSYMM16. Defaults to the minimum possible 32-bit signed integer.
244      * @param[in]  max                          (Optional) Max value used to saturate up the output result before converting back to QSYMM16.
245      *                                          Along with @p min, this value can be used to implement "rectified linear unit" activation functions. Defaults to the maximum possible 32-bit signed integer.
246      */
247     void configure(const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, int result_fixedpoint_multiplier, int result_shift, int min = std::numeric_limits<int32_t>::lowest(),
248                    int max = std::numeric_limits<int32_t>::max());
249     /** Initialise the kernel's inputs, output
250      *
251      * @param[in]  compile_context              The compile context to be used.
252      * @param[in]  input                        Input tensor. Data type supported: S32
253      * @param[in]  bias                         Biases tensor. Only shared biases supported and it can be a nullptr if the biases addition is not required.
254      *                                          Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input.
255      * @param[out] output                       Output tensor. Data type supported: QSYMM16
256      * @param[in]  result_fixedpoint_multiplier Fixed point value to be multiplied to each element of the input matrix when once the result_offset has been add
257      * @param[in]  result_shift                 Number of bits to shift right the result after the fixed point multiplication
258      * @param[in]  min                          (Optional) Min value used to saturate down the output result before converting back to QSYMM16. Defaults to the minimum possible 32-bit signed integer.
259      * @param[in]  max                          (Optional) Max value used to saturate up the output result before converting back to QSYMM16.
260      *                                          Along with @p min, this value can be used to implement "rectified linear unit" activation functions. Defaults to the maximum possible 32-bit signed integer.
261      */
262     void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, int result_fixedpoint_multiplier, int result_shift,
263                    int min = std::numeric_limits<int32_t>::lowest(), int max = std::numeric_limits<int32_t>::max());
264     /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPoint
265      *
266      * @param[in] input  Input tensor info. It is the output of @ref CLGEMMLowpMatrixMultiplyCore function. Data type supported: S32
267      * @param[in] bias   Biases tensor info. Only shared biases supported and it can be a nullptr if the addition of biases is not required.
268      *                   Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input.
269      * @param[in] output Output tensor info. Data type supported: QSYMM16
270      * @param[in] min    (Optional) Min value used to saturate down the output result before converting back to QSYMM16. Defaults to the minimum possible 32-bit signed integer.
271      * @param[in] max    (Optional) Max value used to saturate up the output result before converting back to QSYMM16,
272      *                            Along with @p min, this value can be used to implement "rectified linear unit" activation functions. Defaults to the maximum possible 32-bit signed integer.
273      *
274      * @return a status
275      */
276     static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, int min = std::numeric_limits<int32_t>::lowest(), int max = std::numeric_limits<int32_t>::max());
277 };
278 /** Basic function to execute GEMMLowpQuantizeDown kernels on CL.
279  *
280  *  This function calls the following CL kernels:
281  *
282  * -# @ref CLGEMMLowpQuantizeDownInt32ScaleKernel
283  * -# @ref CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel
284  * -# @ref CLGEMMLowpQuantizeDownInt32ScaleByFixedPointKernel
285 */
286 class CLGEMMLowpOutputStage : public ICLSimpleFunction
287 {
288 public:
289     /** Initialise the kernel's inputs, output
290      *
291      * @param[in]  input  Input tensor. Data type supported: S32
292      * @param[in]  bias   Biases tensor. Only shared biases supported and it can be a nullptr if the biases addition is not required.
293      *                    Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input.
294      * @param[out] output Output tensor. Data type supported: QASYMM8/QASYMM8_SIGNED
295      * @param[in]  info   GEMMLowp output stage metadata.
296      */
297     void configure(const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, const GEMMLowpOutputStageInfo &info);
298     /** Initialise the kernel's inputs, output
299      *
300      * @param[in]  compile_context The compile context to be used.
301      * @param[in]  input           Input tensor. Data type supported: S32
302      * @param[in]  bias            Biases tensor. Only shared biases supported and it can be a nullptr if the biases addition is not required.
303      *                             Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input.
304      * @param[out] output          Output tensor. Data type supported: QASYMM8/QASYMM8_SIGNED
305      * @param[in]  info            GEMMLowp output stage metadata.
306      */
307     void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, const GEMMLowpOutputStageInfo &info);
308     /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMLowpQuantizeDownInt32ScaleByFixedPointKernel
309      *
310      * @param[in] input  Input tensor. It is the output of @ref CLGEMMLowpMatrixMultiplyCore function. Data type supported: S32
311      * @param[in] bias   Biases tensor. Only shared biases supported and it can be a nullptr if the addition of biases is not required.
312      *                   Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input.
313      * @param[in] output Output tensor. Data type supported: QASYMM8/QASYMM8_SIGNED
314      * @param[in] info   GEMMLowp output stage metadata.
315      *
316      * @return a status
317      */
318     static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, const GEMMLowpOutputStageInfo &info);
319 };
320 } // namespace arm_compute
321 #endif /*ARM_COMPUTE_CLGEMMLOWPOUTPUTSTAGE_H */
322