• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2019-2020 Arm Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
24 #ifndef ARM_COMPUTE_CLGEMMMATRIXMULTIPLYRESHAPEDONLYRHSKERNEL_H
25 #define ARM_COMPUTE_CLGEMMMATRIXMULTIPLYRESHAPEDONLYRHSKERNEL_H
26 
27 #include "src/core/CL/ICLKernel.h"
28 
29 #include "arm_compute/core/KernelDescriptors.h"
30 
31 namespace arm_compute
32 {
33 class ICLTensor;
34 
35 /** OpenCL kernel to multiply matrices when only the input matrix RHS (input1) has been reshaped
36  *
37  * @note The input matrix input1 must be reshaped through @ref CLGEMMReshapeRHSMatrixKernel
38  */
39 class CLGEMMMatrixMultiplyReshapedOnlyRHSKernel : public ICLKernel
40 {
41 public:
42     /** Default Constructor */
43     CLGEMMMatrixMultiplyReshapedOnlyRHSKernel();
44     /** Prevent instances of this class from being copied (As this class contains pointers) */
45     CLGEMMMatrixMultiplyReshapedOnlyRHSKernel(const CLGEMMMatrixMultiplyReshapedOnlyRHSKernel &) = delete;
46     /** Prevent instances of this class from being copied (As this class contains pointers) */
47     CLGEMMMatrixMultiplyReshapedOnlyRHSKernel &operator=(const CLGEMMMatrixMultiplyReshapedOnlyRHSKernel &) = delete;
48     /** Allow instances of this class to be moved */
49     CLGEMMMatrixMultiplyReshapedOnlyRHSKernel(CLGEMMMatrixMultiplyReshapedOnlyRHSKernel &&) = default;
50     /** Allow instances of this class to be moved */
51     CLGEMMMatrixMultiplyReshapedOnlyRHSKernel &operator=(CLGEMMMatrixMultiplyReshapedOnlyRHSKernel &&) = default;
52     /** Initialise the kernel's input and output.
53      *
54      * @note If rhs_info.export_to_cl_image = true, this OpenCL kernel will fetch the RHS data using the OpenCL read_image built-in function.
55      *       Reading from the OpenCL image object can increase the performance. However, since the OpenCL image object is created importing the OpenCL buffer,
56      *       the following conditions are required:
57      *       -# rhs_info.n0 can only be 4, 8 and 16
58      *       -# rhs_info.k0 can only be 4, 8 and 16
59      *       -# Data type can only be F32
60      *       -# The platform should support the OpenCL cl_khr_image2d_from_buffer extension
61      *       -# The stride Y for the input1 should satisfy the OpenCL pitch alignment requirement
62      *       -# input1 width should be less or equal to (CL_DEVICE_IMAGE2D_MAX_WIDTH * 4)
63      *       -# input1 (height * depth) should be less or equal to CL_DEVICE_IMAGE2D_MAX_HEIGHT
64      *
65      * @param[in]  input0    Input tensor containing the LHS matrix. Data type supported: F16/F32 (only F32 if rhs_info.export_to_cl_image = true).
66      *                       The number of dimensions for the LHS matrix must be less or equal than 4.
67      * @param[in]  input1    Input tensor containing the RHS reshaped matrix. Data type supported: same as @p input0. The number of dimensions for the RHS matrix must be less or equal than 3.
68      * @param[in]  input2    Input tensor containing the bias matrix. Data type supported: same as @p input0.
69      * @param[out] output    Output tensor to store the result of matrix multiplication. Data type supported: same as @p input0
70      * @param[in]  alpha     Weight of the matrix product
71      * @param[in]  beta      Weight of the matrix bias
72      * @param[in]  lhs_info  LHS matrix information used to retrieve the number of rows to be processed by each thread. Only the following values are supported:
73      *                       lhs_info.m0: 1,2,3,4,5,6,7,8
74      * @param[in]  rhs_info  RHS matrix information used for reshaping the input1 tensor.  Only the following values are supported:
75      *                       rhs_info.k0: 2,3,4,8,16
76      *                       rhs_info.n0: 2,3,4,8,16
77      *                       rhs_info.transpose: true,false
78      * @param[in]  gemm_info GEMM information used to retrieve the original dimensions of the input matrices
79      */
80     void configure(const ICLTensor *input0, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, float alpha, float beta, const GEMMLHSMatrixInfo &lhs_info,
81                    const GEMMRHSMatrixInfo &rhs_info,
82                    const GEMMKernelInfo    &gemm_info);
83     /** Initialise the kernel's input and output.
84      *
85      * @note If rhs_info.export_to_cl_image = true, this OpenCL kernel will fetch the RHS data using the OpenCL read_image built-in function.
86      *       Reading from the OpenCL image object can increase the performance. However, since the OpenCL image object is created importing the OpenCL buffer,
87      *       the following conditions are required:
88      *       -# rhs_info.n0 can only be 4, 8 and 16
89      *       -# rhs_info.k0 can only be 4, 8 and 16
90      *       -# Data type can only be F32
91      *       -# The platform should support the OpenCL cl_khr_image2d_from_buffer extension
92      *       -# The stride Y for the input1 should satisfy the OpenCL pitch alignment requirement
93      *       -# input1 width should be less or equal to (CL_DEVICE_IMAGE2D_MAX_WIDTH * 4)
94      *       -# input1 (height * depth) should be less or equal to CL_DEVICE_IMAGE2D_MAX_HEIGHT
95      *
96      * @param[in]  compile_context The compile context to be used.
97      * @param[in]  input0          Input tensor containing the LHS matrix. Data type supported: F16/F32 (only F32 if rhs_info.export_to_cl_image = true).
98      *                             The number of dimensions for the LHS matrix must be less or equal than 4.
99      * @param[in]  input1          Input tensor containing the RHS reshaped matrix. Data type supported: same as @p input0. The number of dimensions for the RHS matrix must be less or equal than 3.
100      * @param[in]  input2          Input tensor containing the bias matrix. Data type supported: same as @p input0.
101      * @param[out] output          Output tensor to store the result of matrix multiplication. Data type supported: same as @p input0
102      * @param[in]  alpha           Weight of the matrix product
103      * @param[in]  beta            Weight of the matrix bias
104      * @param[in]  lhs_info        LHS matrix information used to retrieve the number of rows to be processed by each thread. Only the following values are supported:
105      *                             lhs_info.m0: 1,2,3,4,5,6,7,8
106      * @param[in]  rhs_info        RHS matrix information used for reshaping the input1 tensor.  Only the following values are supported:
107      *                             rhs_info.k0: 2,3,4,8,16
108      *                             rhs_info.n0: 2,3,4,8,16
109      *                             rhs_info.transpose: true,false
110      * @param[in]  gemm_info       GEMM information used to retrieve the original dimensions of the input matrices
111      */
112     void configure(const CLCompileContext &compile_context, const ICLTensor *input0, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, float alpha, float beta,
113                    const GEMMLHSMatrixInfo &lhs_info,
114                    const GEMMRHSMatrixInfo &rhs_info,
115                    const GEMMKernelInfo    &gemm_info);
116     /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMMatrixMultiplyReshapedOnlyRHSKernel
117      *
118      * @note If rhs_info.export_to_cl_image = true, this OpenCL kernel will fetch the RHS data using the OpenCL read_image built-in function.
119      *       Reading from the OpenCL image object can increase the performance. However, since the OpenCL image object is created importing the OpenCL buffer,
120      *       the following conditions are required:
121      *       -# rhs_info.n0 can only be 4, 8 and 16
122      *       -# rhs_info.k0 can only be 4, 8 and 16
123      *       -# Data type can only be F32
124      *       -# The platform should support the OpenCL cl_khr_image2d_from_buffer extension
125      *       -# The stride Y for the input1 should satisfy the OpenCL pitch alignment requirement
126      *       -# input1 width should be less or equal to (CL_DEVICE_IMAGE2D_MAX_WIDTH * 4)
127      *       -# input1 (height * depth) should be less or equal to CL_DEVICE_IMAGE2D_MAX_HEIGHT
128      *
129      * @param[in] input0    Input tensor info for the LHS matrix. Data type supported: F16/F32 (only F32 if rhs_info.export_to_cl_image = true).
130      *                      The number of dimensions for the LHS matrix must be less or equal than 4.
131      * @param[in] input1    Input tensor info for the RHS reshaped matrix. Data type supported: same as @p input0. The number of dimensions for the RHS matrix must be less or equal than 3.
132      * @param[in] input2    Input tensor info containing the bias matrix. Data type supported: same as @p input0.
133      * @param[in] output    Output tensor info. Data type supported: same as @p input0
134      * @param[in] alpha     Weight of the matrix product
135      * @param[in] beta      Weight of the matrix bias
136      * @param[in] lhs_info  LHS matrix information used to retrieve the number of rows to be processed by each thread. Only the following values are supported:
137      *                      lhs_info.m0: 1,2,3,4,5,6,7,8
138      * @param[in] rhs_info  RHS matrix information used for reshaping the input1 tensor.  Only the following values are supported:
139      *                      rhs_info.k0: 2,3,4,8,16
140      *                      rhs_info.n0: 2,3,4,8,16
141      *                      rhs_info.transpose: true,false
142      * @param[in] gemm_info GEMM information used to retrieve the original dimensions of the input matrices
143      *
144      * @return a status
145      */
146     static Status validate(const ITensorInfo *input0, const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, float alpha, float beta, const GEMMLHSMatrixInfo &lhs_info,
147                            const GEMMRHSMatrixInfo &rhs_info,
148                            const GEMMKernelInfo    &gemm_info);
149 
150     // Inherited methods overridden:
151     void run(const Window &window, cl::CommandQueue &queue) override;
152 
153 private:
154     const ICLTensor *_input0;
155     const ICLTensor *_input1;
156     const ICLTensor *_input2;
157     ICLTensor       *_output;
158     bool             _slide_matrix_b;
159     bool             _reinterpret_input_as_3d;
160     bool             _reinterpret_output_as_3d;
161     bool             _use_dummy_work_items;
162     bool             _add_bias;
163     bool             _broadcast_bias;
164     bool             _export_to_cl_image;
165     bool             _has_pad_y;
166 };
167 } // namespace arm_compute
168 #endif /*ARM_COMPUTE_CLGEMMMATRIXMULTIPLYRESHAPEDONLYRHSKERNEL_H*/
169