• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2021 Arm Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
24 #ifndef ARM_COMPUTE_CLCONV2D_H
25 #define ARM_COMPUTE_CLCONV2D_H
26 
27 #include "arm_compute/core/Types.h"
28 #include "arm_compute/runtime/FunctionDescriptors.h"
29 #include "src/gpu/cl/ClCompileContext.h"
30 #include "src/gpu/cl/IClKernel.h"
31 #include "src/gpu/cl/IClOperator.h"
32 
33 namespace arm_compute
34 {
35 namespace opencl
36 {
37 /** Basic function to compute the convolution layer. This function calls the following OpenCL kernels/functions:
38  *
39  * -# @ref opencl::ClGemmConv2d
40  * -# @ref opencl::ClWinogradConv2d
41  * -# @ref opencl::ClIndirectConv2d
42  * -# @ref opencl::ClDirectConv2d
43  * -# @ref CLFFTConvolutionLayer
44  *
45  * The function selects one of the algorithms mentioned above based on:
46  *      - The size of the kernel
47  *      - Number of src/dst feature maps
48  *      - Amount of memory needed
49  *
50  * Generally GEMM-based convolution is executed when neither Winograd nor FFT nor Direct convolution can be performed.
51  *
52  * FP32 Algorithm| Filter Size                                                 |   Input/Output feature maps               |
53  * --------------|-------------------------------------------------------------|-------------------------------------------|
54  * Winograd      | 3x3 1x3 3x1 5x1 1x5 5x5(fast maths) 7x1 1x7                 |  Input channels is greater than 3         |
55  * FFT           | Squared kernels and greater than 9x9                        |  Input feature maps > Output feature maps |
56  * DirectConv    | 9x9                                                         |                                           |
57  * GEMM          | Any size                                                    |                                           |
58  *
59  * Winograd 5x5 requires fast maths enabled.
60  *
61  * FP16 Algorithm| Filter Size                |   Input/Output feature maps               |
62  * --------------|----------------------------|-------------------------------------------|
63  * Winograd      | 3x3 1x3 3x1 5x1 1x5 5x5    |  Input channels is greater than 3         |
64  * FFT           | Not supported              |                                           |
65  * DirectConv    | 9x9                        |                                           |
66  * GEMM          | Any size                   |                                           |
67  *
68  * Winograd FP16 requires fast maths enabled.
69  *
70  */
71 class ClConv2d : public IClOperator
72 {
73 public:
74     /** Default constructor */
75     ClConv2d();
76     /** Default Destructor */
77     ~ClConv2d();
78     /** Prevent instances of this class from being copied (As this class contains pointers) */
79     ClConv2d(const ClConv2d &) = delete;
80     /** Default move constructor */
81     ClConv2d(ClConv2d &&) = default;
82     /** Prevent instances of this class from being copied (As this class contains pointers) */
83     ClConv2d &operator=(const ClConv2d &) = delete;
84     /** Default move assignment operator */
85     ClConv2d &operator=(ClConv2d &&) = default;
86     /** Set the src and dst tensors.
87      *
88      * Valid data layouts:
89      * - NHWC
90      * - NCHW
91      *
92      * Valid data type configurations:
93      * |src0           |src1               |src2   |dst            |
94      * |:--------------|:------------------|:------|:--------------|
95      * |F16            |F16                |F16    |F16            |
96      * |F32            |F32                |F32    |F32            |
97      * |QASYMM8        |QASYMM8            |S32    |QASYMM8        |
98      * |QASYMM8        |QSYMM8_PER_CHANNEL |S32    |QASYMM8        |
99      * |QASYMM8_SIGNED |QASYMM8_SIGNED     |S32    |QASYMM8_SIGNED |
100      * |QASYMM8_SIGNED |QSYMM8_PER_CHANNEL |S32    |QASYMM8_SIGNED |
101      *
102      * @param[in]  compile_context The compile context to be used.
103      * @param[in]  src             Source tensor info. 3 lower dimensions represent a single src [width, height, IFM],
104      *                             while every optional dimension from 4 and above represent a batch of srcs.
105      *                             Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
106      * @param[in]  weights         Weights tensor info. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM].
107      *                             Data type supported: Same as @p src, also could be QSYMM8_PER_CHANNEL if src is QASYMM8/QASYMM8_SIGNED.
108      * @param[in]  biases          Biases tensor info. Shared biases supported. Biases are 1D tensor with dimensions [OFM].
109      *                             Data type supported: Same as @p src, except for src of QASYMM8/QASYMM8_SIGNED type where biases should be of S32 type.
110      * @param[out] dst             Destination tensor info. 3 lower dimensions represent a single dst [width, height, OFM], while the rest represent batch of dsts.
111      *                             Data types supported: Same as @p src.
112      * @param[in]  conv2d_info     Contains convolution 2d info described in @ref Conv2dInfo.
113      * @param[in]  weights_info    Specifies if the weights tensor has been reshaped with CLWeightsReshapeKernel. Data type supported: Same as @p src.
114      */
115     void configure(const CLCompileContext &compile_context, ITensorInfo *src, ITensorInfo *weights, ITensorInfo *biases, ITensorInfo *dst, const Conv2dInfo &conv2d_info,
116                    const WeightsInfo &weights_info = WeightsInfo());
117     /** Static function to check if given info will lead to a valid configuration of @ref ClConv2d
118      *
119      * Similar to ClConv2d::configure()
120      *
121      * @return a status
122      */
123     static Status validate(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *dst, const Conv2dInfo &conv2d_info,
124                            const WeightsInfo &weights_info = WeightsInfo());
125     /** Static function to check if given info will return the convolution called by @ref ClConv2d
126      *
127      * @param[in] src          Source tensor. 3 lower dimensions represent a single src [width, height, IFM],
128      *                         while every optional dimension from 4 and above represent a batch of srcs.
129      *                         Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
130      * @param[in] weights      Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM].
131      *                         Data type supported: Same as @p src, also could be QSYMM8_PER_CHANNEL if src is QASYMM8/QASYMM8_SIGNED.
132      * @param[in] dst          Destination tensor. 3 lower dimensions represent a single dst [width, height, OFM], while the rest represent batch of dsts.
133      *                         Data types supported: Same as @p src.
134      * @param[in] conv2d_info  Contains convolution 2d info described in @ref Conv2dInfo.
135      * @param[in] weights_info Specifies if the weights tensor has been reshaped with CLWeightsReshapeKernel.
136      * @param[in] gpu_target   Specifies the @p GPUTarget.
137      *
138      * @return the Convolution Method Hint
139      */
140     static ConvolutionMethod get_convolution_method(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *dst, const Conv2dInfo &conv2d_info,
141                                                     const WeightsInfo &weights_info, const GPUTarget gpu_target);
142     // Inherited methods overridden:
143     void run(ITensorPack &tensors) override;
144     void prepare(ITensorPack &tensors) override;
145     experimental::MemoryRequirements workspace() const override;
146 
147 private:
148     std::unique_ptr<IClOperator>     _operator;
149     experimental::MemoryRequirements _aux_mem{};
150 };
151 } // namespace opencl
152 } // namespace arm_compute
153 #endif /* ARM_COMPUTE_CLCONV2D_H */
154