• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2018-2020 Arm Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
24 #ifndef ARM_COMPUTE_CLGEMMMATRIXMULTIPLYRESHAPEDKERNEL_H
25 #define ARM_COMPUTE_CLGEMMMATRIXMULTIPLYRESHAPEDKERNEL_H
26 
27 #include "src/core/CL/ICLKernel.h"
28 
29 #include "arm_compute/core/KernelDescriptors.h"
30 
31 namespace arm_compute
32 {
33 class ICLTensor;
34 
35 /** OpenCL kernel to multiply matrices when both the input matrices LHS (input0) and RHS (input1) have been reshaped
36  *
37  * @note The input matrices @p input0 and @p input1 must be reshaped through @ref CLGEMMReshapeLHSMatrixKernel and  @ref CLGEMMReshapeRHSMatrixKernel
38  */
39 class CLGEMMMatrixMultiplyReshapedKernel : public ICLKernel
40 {
41 public:
42     /** Default Constructor */
43     CLGEMMMatrixMultiplyReshapedKernel();
44     /** Prevent instances of this class from being copied (As this class contains pointers) */
45     CLGEMMMatrixMultiplyReshapedKernel(const CLGEMMMatrixMultiplyReshapedKernel &) = delete;
46     /** Prevent instances of this class from being copied (As this class contains pointers) */
47     CLGEMMMatrixMultiplyReshapedKernel &operator=(const CLGEMMMatrixMultiplyReshapedKernel &) = delete;
48     /** Allow instances of this class to be moved */
49     CLGEMMMatrixMultiplyReshapedKernel(CLGEMMMatrixMultiplyReshapedKernel &&) = default;
50     /** Allow instances of this class to be moved */
51     CLGEMMMatrixMultiplyReshapedKernel &operator=(CLGEMMMatrixMultiplyReshapedKernel &&) = default;
52     /** Initialise the kernel's input and output.
53      *
54      * @note The F16 computation also supports mixed precision through the gemm_info.fp_mixed_precision flag.
55      *       Mixed precision combines different floating precisions during the computation, in particular, F32 for the accumulations and F16 for the
56      *       multiplications. i.e. float c = (half)a * (half)b
57      *
58      * @note If rhs_info.export_to_cl_image = true, this OpenCL kernel will fetch the RHS data using the OpenCL read_image built-in function.
59      *       Reading from the OpenCL image object can increase the performance. However, since the OpenCL image object is created importing the OpenCL buffer,
60      *       the following conditions are required:
61      *       -# rhs_info.n0 can only be 4, 8 and 16
62      *       -# rhs_info.k0 can only be 4, 8 and 16
63      *       -# Data type can only be F32
64      *       -# The platform should support the OpenCL cl_khr_image2d_from_buffer extension
65      *       -# The stride Y for the input1 should satisfy the OpenCL pitch alignment requirement
66      *       -# input1 width should be less or equal to (CL_DEVICE_IMAGE2D_MAX_WIDTH * 4)
67      *       -# input1 (height * depth) should be less or equal to CL_DEVICE_IMAGE2D_MAX_HEIGHT
68      *
69      * @param[in]  input0    Input tensor containing the LHS reshaped matrix. Data type supported: F16/F32 (only F32 if rhs_info.export_to_cl_image = true). The number of dimensions for the LHS matrix must be less or equal than 4
70      * @param[in]  input1    Input tensor containing the RHS reshaped matrix. Data type supported: same as @p input0. The number of dimensions for the RHS matrix must be less or equal than 3
71      * @param[in]  input2    Input tensor containing the bias matrix. Data type supported: same as @p input0.
72      * @param[out] output    Output tensor to store the result of matrix multiplication. Data type supported: same as @p input0
73      * @param[in]  alpha     Weight of the matrix product
74      * @param[in]  beta      Weight of the matrix bias
75      * @param[in]  lhs_info  LHS matrix information used for reshaping the input0 tensor. Only the following values are supported:
76      *                       lhs_info.m0: 2,3,4,5,6,7,8
77      *                       lhs_info.k0: 2,3,4,8,16
78      *                       lhs_info.transpose: false
79      * @param[in]  rhs_info  RHS matrix information used for reshaping the input1 tensor.  Only the following values are supported:
80      *                       rhs_info.n0: 2,3,4,8,16 (only 4, 8 and 16 if rhs_info.export_to_cl_image = true)
81      *                       rhs_info.k0: 2,3,4,8,16 (only 4, 8 and 16 if rhs_info.export_to_cl_image = true)
82      *                       rhs_info.transpose: true
83      * @param[in]  gemm_info GEMM information used to retrieve the original dimensions of the input matrices
84      *
85      * @note lhs_info.k0 must be equal to rhs_info.k0
86      */
87     void configure(const ICLTensor *input0, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, float alpha, float beta, const GEMMLHSMatrixInfo &lhs_info,
88                    const GEMMRHSMatrixInfo &rhs_info,
89                    const GEMMKernelInfo    &gemm_info);
90     /** Initialise the kernel's input and output.
91      *
92      * @note The F16 computation also supports mixed precision through the gemm_info.fp_mixed_precision flag.
93      *       Mixed precision combines different floating precisions during the computation, in particular, F32 for the accumulations and F16 for the
94      *       multiplications. i.e. float c = (half)a * (half)b
95      *
96      * @note If rhs_info.export_to_cl_image = true, this OpenCL kernel will fetch the RHS data using the OpenCL read_image built-in function.
97      *       Reading from the OpenCL image object can increase the performance. However, since the OpenCL image object is created importing the OpenCL buffer,
98      *       the following conditions are required:
99      *       -# rhs_info.n0 can only be 4, 8 and 16
100      *       -# rhs_info.k0 can only be 4, 8 and 16
101      *       -# Data type can only be F32
102      *       -# The platform should support the OpenCL cl_khr_image2d_from_buffer extension
103      *       -# The stride Y for the input1 should satisfy the OpenCL pitch alignment requirement
104      *       -# input1 width should be less or equal to (CL_DEVICE_IMAGE2D_MAX_WIDTH * 4)
105      *       -# input1 (height * depth) should be less or equal to CL_DEVICE_IMAGE2D_MAX_HEIGHT
106      *
107      * @param[in]  compile_context The compile context to be used.
108      * @param[in]  input0          Input tensor containing the LHS reshaped matrix. Data type supported: F16/F32  (only F32 if rhs_info.export_to_cl_image = true). The number of dimensions for the LHS matrix must be less or equal than 4
109      * @param[in]  input1          Input tensor containing the RHS reshaped matrix. Data type supported: same as @p input0. The number of dimensions for the RHS matrix must be less or equal than 3
110      * @param[in]  input2          Input tensor containing the bias matrix. Data type supported: same as @p input0.
111      * @param[out] output          Output tensor to store the result of matrix multiplication. Data type supported: same as @p input0
112      * @param[in]  alpha           Weight of the matrix product
113      * @param[in]  beta            Weight of the matrix bias
114      * @param[in]  lhs_info        LHS matrix information used for reshaping the input0 tensor.  Only the following values are supported:
115      *                             lhs_info.m0: 2,3,4,5,6,7,8
116      *                             lhs_info.k0: 2,3,4,8,16
117      *                             lhs_info.transpose: false
118      * @param[in]  rhs_info        RHS matrix information used for reshaping the input1 tensor.  Only the following values are supported:
119      *                             rhs_info.n0: 2,3,4,8,16 (only 4, 8 and 16 if rhs_info.export_to_cl_image = true)
120      *                             rhs_info.k0: 2,3,4,8,16 (only 4, 8 and 16 if rhs_info.export_to_cl_image = true)
121      *                             rhs_info.transpose: true
122      * @param[in]  gemm_info       GEMM information used to retrieve the original dimensions of the input matrices
123      *
124      * @note lhs_info.k0 must be equal to rhs_info.k0
125      */
126     void configure(const CLCompileContext &compile_context, const ICLTensor *input0, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, float alpha, float beta,
127                    const GEMMLHSMatrixInfo &lhs_info,
128                    const GEMMRHSMatrixInfo &rhs_info,
129                    const GEMMKernelInfo    &gemm_info);
130     /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMMatrixMultiplyReshapedKernel
131      *
132      * @note The F16 computation also supports mixed precision through the gemm_info.fp_mixed_precision flag.
133      *       Mixed precision combines different floating precisions during the computation, in particular, F32 for the accumulations and F16 for the
134      *       multiplications. i.e. float c = (half)a * (half)b
135      *
136      * @note If rhs_info.export_to_cl_image = true, this OpenCL kernel will fetch the RHS data using the OpenCL read_image built-in function.
137      *       Reading from the OpenCL image object can increase the performance. However, since the OpenCL image object is created importing the OpenCL buffer,
138      *       the following conditions are required:
139      *       -# rhs_info.n0 can only be 4, 8 and 16
140      *       -# rhs_info.k0 can only be 4, 8 and 16
141      *       -# Data type can only be F32
142      *       -# The platform should support the OpenCL cl_khr_image2d_from_buffer extension
143      *       -# The stride Y for the input1 should satisfy the OpenCL pitch alignment requirement
144      *       -# input1 width should be less or equal to (CL_DEVICE_IMAGE2D_MAX_WIDTH * 4)
145      *       -# input1 (height * depth) should be less or equal to CL_DEVICE_IMAGE2D_MAX_HEIGHT
146      *
147      * @param[in] input0    Input tensor containing the LHS reshaped matrix. Data type supported: F16/F32  (only F32 if rhs_info.export_to_cl_image = true). The number of dimensions for the LHS matrix must be less or equal than 4
148      * @param[in] input1    Input tensor containing the RHS reshaped matrix. Data type supported: same as @p input0. The number of dimensions for the RHS matrix must be less or equal than 3
149      * @param[in] input2    Input tensor info containing the bias matrix. Data type supported: same as @p input0.
150      * @param[in] output    Output tensor to store the result of matrix multiplication. Data type supported: same as @p input0
151      * @param[in] alpha     Weight of the matrix product
152      * @param[in] beta      Weight of the matrix bias
153      * @param[in] lhs_info  LHS matrix information used for reshaping the input0 tensor.  Only the following values are supported:
154      *                      lhs_info.m0: 2,3,4,5,6,7,8
155      *                      lhs_info.k0: 2,3,4,8,16
156      *                      lhs_info.transpose: false
157      * @param[in] rhs_info  RHS matrix information used for reshaping the input1 tensor.  Only the following values are supported:
158      *                      rhs_info.n0: 2,3,4,8,16 (only 4, 8 and 16 if rhs_info.export_to_cl_image = true)
159      *                      rhs_info.k0: 2,3,4,8,16 (only 4, 8 and 16 if rhs_info.export_to_cl_image = true)
160      *                      rhs_info.transpose: true
161      * @param[in] gemm_info GEMM information used to retrieve the original dimensions of the input matrices
162      *
163      * @note lhs_info.k0 must be equal to rhs_info.k0
164      *
165      * @return a status
166      */
167     static Status validate(const ITensorInfo *input0, const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, float alpha, float beta, const GEMMLHSMatrixInfo &lhs_info,
168                            const GEMMRHSMatrixInfo &rhs_info,
169                            const GEMMKernelInfo    &gemm_info);
170 
171     // Inherited methods overridden:
172     void run(const Window &window, cl::CommandQueue &queue) override;
173 
174 private:
175     const ICLTensor *_input0;
176     const ICLTensor *_input1;
177     const ICLTensor *_input2;
178     ICLTensor       *_output;
179     bool             _slide_matrix_b;
180     bool             _reinterpret_output_as_3d;
181     bool             _use_dummy_work_items;
182     bool             _add_bias;
183     bool             _broadcast_bias;
184     bool             _export_to_cl_image;
185     unsigned int     _k;
186 };
187 } // namespace arm_compute
188 #endif /*ARM_COMPUTE_CLGEMMMATRIXMULTIPLYRESHAPEDKERNEL_H*/