• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2017-2020 Arm Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
24 #ifndef ARM_COMPUTE_CLGEMMMATRIXMULTIPLYKERNEL_H
25 #define ARM_COMPUTE_CLGEMMMATRIXMULTIPLYKERNEL_H
26 
27 #include "src/core/CL/ICLKernel.h"
28 
29 namespace arm_compute
30 {
31 class ICLTensor;
32 
33 /** OpenCL kernel to multiply two input matrices "A" and "B" and add a martix "C" if provided. All elements of the output matrix will be multiplied by alpha. In case matrix C is passed, it will be added to the previous result.
34  *  For the matrix C, the broadcast addition is supported if the flag "broadcast_bias" is set in the GEMMReshapeInfo object
35  *
36  * @note If the input tensors @p input0 and @p input1 have been reshaped respectively with @ref CLGEMMReshapeLHSMatrixKernel" and @ref CLGEMMReshapeRHSMatrixKernel,
37  *       the flag @p is_interleaved_transposed must be set to true
38  *
39  * @attention @p input1 tensor must have at least 2 dimensions (matrix)
40  *
41  */
42 class CLGEMMMatrixMultiplyKernel : public ICLKernel
43 {
44 public:
45     /** Default constructor */
46     CLGEMMMatrixMultiplyKernel();
47     /** Prevent instances of this class from being copied (As this class contains pointers) */
48     CLGEMMMatrixMultiplyKernel(const CLGEMMMatrixMultiplyKernel &) = delete;
49     /** Prevent instances of this class from being copied (As this class contains pointers) */
50     CLGEMMMatrixMultiplyKernel &operator=(const CLGEMMMatrixMultiplyKernel &) = delete;
51     /** Allow instances of this class to be moved */
52     CLGEMMMatrixMultiplyKernel(CLGEMMMatrixMultiplyKernel &&) = default;
53     /** Allow instances of this class to be moved */
54     CLGEMMMatrixMultiplyKernel &operator=(CLGEMMMatrixMultiplyKernel &&) = default;
55     /** Initialise the kernel's input, output and alpha
56      *
57      * @param[in]  input0                    Input tensor containing the Matrix A. Data types supported: F16/F32
58      * @param[in]  input1                    Input tensor containing the Matrix B. Data type supported: same as @p input0
59      * @param[in]  input2                    Input tensor containing the Matrix C (bias). Can be nullptr. Data type supported: same as @p input0
60      * @param[out] output                    Output tensor to store the result of matrix multiplication. Data type supported: same as @p input0
61      * @param[in]  alpha                     Weight of the matrix product
62      * @param[in]  beta                      (Optional) Weight of vector C. Default value is 0. Only beta = 1 is currently supported.
63      * @param[in]  is_interleaved_transposed (Optional) True if input0 and input1 have been reshaped respectively using @ref CLGEMMReshapeLHSMatrixKernel and @ref CLGEMMReshapeRHSMatrixKernel
64      * @param[in]  reshape_info              (Optional) GEMM reshape info. If is_interleaved_transposed = true, this object must contain the information to understand how the matrix A and matrix B have been reshaped
65      * @param[in]  fp_mixed_precision        (Optional) Use wider accumulators (32 bit instead of 16 for FP16) to improve accuracy
66      * @param[in]  activation_info           (Optional) Activation to apply after the matrix multiplication
67      *
68      */
69     void configure(const ICLTensor *input0, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, float alpha, float beta = 0.f,
70                    bool is_interleaved_transposed = true, const GEMMReshapeInfo &reshape_info = GEMMReshapeInfo(), bool fp_mixed_precision = false, const ActivationLayerInfo &activation_info = ActivationLayerInfo());
71     /** Initialise the kernel's input, output and alpha
72      *
73      * @param[in]  compile_context           The compile context to be used.
74      * @param[in]  input0                    Input tensor containing the Matrix A. Data types supported: F16/F32
75      * @param[in]  input1                    Input tensor containing the Matrix B. Data type supported: same as @p input0
76      * @param[in]  input2                    Input tensor containing the Matrix C (bias). Can be nullptr. Data type supported: same as @p input0
77      * @param[out] output                    Output tensor to store the result of matrix multiplication. Data type supported: same as @p input0
78      * @param[in]  alpha                     Weight of the matrix product
79      * @param[in]  beta                      (Optional) Weight of vector C. Default value is 0. Only beta = 1 is currently supported.
80      * @param[in]  is_interleaved_transposed (Optional) True if input0 and input1 have been reshaped respectively using @ref CLGEMMReshapeLHSMatrixKernel and @ref CLGEMMReshapeRHSMatrixKernel
81      * @param[in]  reshape_info              (Optional) GEMM reshape info. If is_interleaved_transposed = true, this object must contain the information to understand how the matrix A and matrix B have been reshaped
82      * @param[in]  fp_mixed_precision        (Optional) Use wider accumulators (32 bit instead of 16 for FP16) to improve accuracy
83      * @param[in]  activation_info           (Optional) Activation to apply after the matrix multiplication
84      *
85      */
86     void configure(const CLCompileContext &compile_context, const ICLTensor *input0, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, float alpha, float beta = 0.f,
87                    bool is_interleaved_transposed = true, const GEMMReshapeInfo &reshape_info = GEMMReshapeInfo(), bool fp_mixed_precision = false, const ActivationLayerInfo &activation_info = ActivationLayerInfo());
88     /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMMatrixMultiplyKernel
89      *
90      * @param[in] input0                    Input tensor containing the Matrix A info. Data types supported: F16/F32
91      * @param[in] input1                    Input tensor containing the Matrix B info. Data type supported: same as @p input0
92      * @param[in] input2                    Input tensor containing the Matrix C (bias) info. Can be nullptr. Data type supported: same as @p input0
93      * @param[in] output                    Output tensor to store the result of matrix multiplication. Data type supported: same as @p input0
94      * @param[in] alpha                     Weight of the matrix product
95      * @param[in] beta                      Weight of vector C. Default value is 0. Only beta = 1 is currently supported.
96      * @param[in] is_interleaved_transposed True if input0 and input1 have been reshaped respectively using @ref CLGEMMReshapeLHSMatrixKernel and @ref CLGEMMReshapeRHSMatrixKernel
97      * @param[in] reshape_info              GEMM reshape info. If is_interleaved_transposed = true, this object must contain the information to understand how the matrix A and matrix B have been reshaped
98      * @param[in] gpu_target                GPU Target
99      * @param[in] fp_mixed_precision        (Optional) Use wider accumulators (32 bit instead of 16 for FP16) to improve accuracy
100      * @param[in] activation_info           (Optional) Activation to apply after the matrix multiplication
101      *
102      * @return a status
103      */
104     static Status validate(const ITensorInfo *input0, const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, float alpha, float beta,
105                            bool is_interleaved_transposed, const GEMMReshapeInfo &reshape_info, GPUTarget gpu_target, bool fp_mixed_precision = false, const ActivationLayerInfo &activation_info = ActivationLayerInfo());
106 
107     // Inherited methods overridden:
108     void run(const Window &window, cl::CommandQueue &queue) override;
109 
110 public:
111     const ICLTensor *_input0;
112     const ICLTensor *_input1;
113     const ICLTensor *_input2;
114     ICLTensor       *_output;
115     bool             _slide_matrix_b;
116     bool             _reinterpret_input_as_3d;
117     bool             _reinterpret_output_as_3d;
118     bool             _add_bias;
119     bool             _broadcast_bias;
120 };
121 } // namespace arm_compute
122 #endif /* ARM_COMPUTE_CLGEMMMATRIXMULTIPLYKERNEL_H */
123