• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2019-2021 Arm Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
24 #ifndef ARM_COMPUTE_CLFFTCONVOLUTIONLAYER_H
25 #define ARM_COMPUTE_CLFFTCONVOLUTIONLAYER_H
26 
27 #include "arm_compute/runtime/IFunction.h"
28 
29 #include "arm_compute/core/Types.h"
30 #include "arm_compute/runtime/CL/functions/CLActivationLayer.h"
31 #include "arm_compute/runtime/CL/functions/CLElementwiseOperations.h"
32 #include "arm_compute/runtime/CL/functions/CLFFT2D.h"
33 #include "arm_compute/runtime/CL/functions/CLPadLayer.h"
34 #include "arm_compute/runtime/CL/functions/CLPermute.h"
35 #include "arm_compute/runtime/CL/functions/CLPixelWiseMultiplication.h"
36 #include "arm_compute/runtime/CL/functions/CLReductionOperation.h"
37 #include "arm_compute/runtime/CL/functions/CLReshapeLayer.h"
38 #include "arm_compute/runtime/CL/functions/CLReverse.h"
39 #include "arm_compute/runtime/CL/functions/CLSlice.h"
40 
41 namespace arm_compute
42 {
43 // Forward declarations
44 class ICLTensor;
45 
46 /** Basic function to execute FFT-based convolution on OpenCL. This function calls the following OpenCL functions/kernels:
47  *
48  *  -# @ref CLPermute                        Permute input if NHWC(only NCHW is supported).
49  *  -# @ref CLPadLayer                       Pad input.
50  *  -# @ref CLFFT2D                          Forward transform to the frequency domain.
51  *  -# @ref CLComplexPixelWiseMultiplication Complex element-wise product of input and the weights.
52  *  -# @ref CLReductionOperation             Reduction across channels.
53  *  -# @ref CLFFT2D                          Inverse transform back to the time domain.
54  *  -# @ref CLStridedSlice                   Extract valid output.
55  *  -# @ref CLArithmeticAddition             Add bias.
56  *  -# @ref CLActivationLayer                Perform activation.
57  *  -# @ref CLPermute                        Permute output if NHWC(only NCHW is supported).
58  */
59 class CLFFTConvolutionLayer : public IFunction
60 {
61 public:
62     /** Default constructor */
63     CLFFTConvolutionLayer(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
64     /** Prevent instances of this class from being copied (As this class contains pointers) */
65     CLFFTConvolutionLayer(const CLFFTConvolutionLayer &) = delete;
66     /** Default move constructor */
67     CLFFTConvolutionLayer(CLFFTConvolutionLayer &&) = default;
68     /** Prevent instances of this class from being copied (As this class contains pointers) */
69     CLFFTConvolutionLayer &operator=(const CLFFTConvolutionLayer &) = delete;
70     /** Default move assignment operator */
71     CLFFTConvolutionLayer &operator=(CLFFTConvolutionLayer &&) = default;
72     /** Set the input and output tensors.
73      *
74      * Valid data layouts:
75      * - All
76      *
77      * Valid data type configurations:
78      * |src    |dst    |
79      * |:------|:------|
80      * |F32    |F32    |
81      * |F16    |F16    |
82      *
83      * @note: This function only works with any square kernel size and unit strides for both NCHW and NHWC data layout
84      *
85      * @param[in]  input            Source tensor. 3 lower dimensions represent a single input [width, height, IFM],
86      *                              while every optional dimension from 4 and above represent a batch of inputs.
87      *                              Data types supported:  F16/F32.
88      * @param[in]  weights          Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. Data type supported:Same as @p input.
89      * @param[in]  biases           Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM].Data type supported: Same as @p input
90      * @param[out] output           Destination tensor. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs.
91      *                              Data types supported: Same as @p input.
92      * @param[in]  conv_info        Contains padding and stride information described in @ref PadStrideInfo.
93      * @param[in]  act_info         (Optional) Activation layer information in case of a fused activation.
94      * @param[in]  enable_fast_math (Optional) Enable fast math computation. In case this flag were set, the function could dispatch the fastest implementation
95      *                              available which may introduce a drop of accuracy as well. Default is false
96      */
97     void configure(ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info,
98                    const ActivationLayerInfo &act_info = ActivationLayerInfo(), bool enable_fast_math = false);
99     /** Set the input and output tensors.
100      *
101      * @note: This function only works with any square kernel size and unit strides for both NCHW and NHWC data layout
102      *
103      * @param[in]  compile_context  The compile context to be used.
104      * @param[in]  input            Source tensor. 3 lower dimensions represent a single input [width, height, IFM],
105      *                              while every optional dimension from 4 and above represent a batch of inputs.
106      *                              Data types supported: F16/F32.
107      * @param[in]  weights          Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. Data type supported:Same as @p input.
108      * @param[in]  biases           Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM].Data type supported: Same as @p input
109      * @param[out] output           Destination tensor. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs.
110      *                              Data types supported: Same as @p input.
111      * @param[in]  conv_info        Contains padding and stride information described in @ref PadStrideInfo.
112      * @param[in]  act_info         (Optional) Activation layer information in case of a fused activation.
113      * @param[in]  enable_fast_math (Optional) Enable fast math computation. In case this flag were set, the function could dispatch the fastest implementation
114      *                              available which may introduce a drop of accuracy as well. Default is false
115      */
116     void configure(const CLCompileContext &compile_context, ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info,
117                    const ActivationLayerInfo &act_info = ActivationLayerInfo(), bool enable_fast_math = false);
118     /** Static function to check if given info will lead to a valid configuration of @ref CLFFTConvolutionLayer
119      *
120      * @note: This function only works with any square kernel size and unit strides for both NCHW and NHWC data layout
121      *
122      * @param[in]  input            Source tensor. 3 lower dimensions represent a single input [width, height, IFM],
123      *                              while every optional dimension from 4 and above represent a batch of inputs.
124      *                              Data types supported: F16/F32.
125      * @param[in]  weights          Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. Data type supported:Same as @p input.
126      * @param[in]  biases           Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM].Data type supported: Same as @p input
127      * @param[out] output           Destination tensor. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs.
128      *                              Data types supported: Same as @p input.
129      * @param[in]  conv_info        Contains padding and stride information described in @ref PadStrideInfo.
130      * @param[in]  act_info         (Optional) Activation layer information in case of a fused activation.
131      * @param[in]  enable_fast_math (Optional) Enable fast math computation. In case this flag were set, the function could dispatch the fastest implementation
132      *                              available which may introduce a drop of accuracy as well. Default is false
133      *
134      * @return a status
135      */
136     static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info,
137                            const ActivationLayerInfo &act_info = ActivationLayerInfo(), bool enable_fast_math = false);
138 
139     // Inherited methods overridden:
140     void run() override;
141     void prepare() override;
142 
143 private:
144     MemoryGroup                      _memory_group;
145     CLReverse                        _flip_weights_func;
146     CLPermute                        _permute_input_func;
147     CLPermute                        _permute_output_func;
148     CLPermute                        _permute_weights_func;
149     CLPermute                        _permute_bias_func;
150     CLPadLayer                       _pad_input_func;
151     CLPadLayer                       _pad_weights_func;
152     CLFFT2D                          _transform_input_func;
153     std::unique_ptr<CLFFT2D>         _transform_weights_func;
154     CLFFT2D                          _itransform_output_func;
155     CLComplexPixelWiseMultiplication _prod_func;
156     CLReductionOperation             _reduce_func;
157     CLSlice                          _extract_output_func;
158     CLArithmeticAddition             _bias_add_func;
159     CLActivationLayer                _activation_layer_func;
160 
161     CLTensor _permuted_input;
162     CLTensor _permuted_weights;
163     CLTensor _permuted_bias;
164     CLTensor _permuted_output;
165     CLTensor _padded_input;
166     CLTensor _padded_weights;
167     CLTensor _flip_axis;
168     CLTensor _flipped_weights;
169     CLTensor _transformed_input;
170     CLTensor _transformed_weights;
171     CLTensor _input_weights_product;
172     CLTensor _output_product;
173     CLTensor _output_reduced;
174     CLTensor _itransformed_output;
175     CLTensor _reshaped_output;
176     CLTensor _bias_output;
177 
178     const ICLTensor *_original_weights;
179     const ICLTensor *_original_bias;
180     bool             _is_activationlayer_enabled;
181     bool             _needs_permute;
182     bool             _has_bias;
183     bool             _is_prepared;
184 };
185 } // namespace arm_compute
186 #endif /* ARM_COMPUTE_CLFFTCONVOLUTIONLAYER_H */
187