• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2017-2020 Arm Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
24 #ifndef ARM_COMPUTE_NEDEPTHWISECONVOLUTION_H
25 #define ARM_COMPUTE_NEDEPTHWISECONVOLUTION_H
26 
27 #include "arm_compute/runtime/NEON/functions/NEActivationLayer.h"
28 #include "arm_compute/runtime/NEON/functions/NEPermute.h"
29 #include "arm_compute/runtime/NEON/functions/assembly/NEDepthwiseConvolutionAssemblyDispatch.h"
30 #include <memory>
31 
32 namespace arm_compute
33 {
34 // Forward declarations
35 class ITensor;
36 class NEDepthwiseConvolutionLayerNativeKernel;
37 
38 /** Function to execute a depthwise convolution.
39  */
40 class NEDepthwiseConvolutionLayer : public IFunction
41 {
42 public:
43     /** Default constructor */
44     NEDepthwiseConvolutionLayer(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
45     /** Prevent instances of this class from being copied (As this class contains pointers) */
46     NEDepthwiseConvolutionLayer(const NEDepthwiseConvolutionLayer &) = delete;
47     /** Default move constructor */
48     NEDepthwiseConvolutionLayer(NEDepthwiseConvolutionLayer &&) = default;
49     /** Prevent instances of this class from being copied (As this class contains pointers) */
50     NEDepthwiseConvolutionLayer &operator=(const NEDepthwiseConvolutionLayer &) = delete;
51     /** Default move assignment operator */
52     NEDepthwiseConvolutionLayer &operator=(NEDepthwiseConvolutionLayer &&) = default;
53     /** Default destructor */
54     ~NEDepthwiseConvolutionLayer();
55     /** Initialize the function's source, destination, weights and convolution information.
56      *
57      * @param[in, out] input            Source tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/F16/F32
58      * @param[out]     output           Destination tensor. Data type supported: same as @p input.
59      * @param[in]      weights          Weights tensor. These are 3D tensors with shape [kernel_x, kernel_y, IFM].
60      *                                  Data type supported: Same as @p input or QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL when @p input is QASYMM8/QASYMM8_SIGNED.
61      * @param[in]      biases           Biases tensor. A 1D tensor with shape [IFM]. Must be nullptr if not needed.
62      *                                  Data type supported: Same as @p input, S32 when input is QASYMM8/QASYMM8_SIGNED.
63      * @param[in]      conv_info        Padding and stride information to use for the convolution.
64      * @param[in]      depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
65      * @param[in]      act_info         (Optional) Activation layer information in case of a fused activation.
66      * @param[in]      dilation         (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
67      */
68     void configure(ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info,
69                    unsigned int depth_multiplier = 1, const ActivationLayerInfo &act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U));
70 
71     /** Static function to check if given info will lead to a valid configuration of @ref NEDepthwiseConvolutionLayer
72      *
73      * @param[in] input            Source tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/F16/F32
74      * @param[in] output           Destination tensor. Data type supported: same as @p input.
75      * @param[in] weights          Weights tensor. These are 3D tensors with shape [kernel_x, kernel_y, IFM].
76      *                             Data type supported: Same as @p input or QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL when @p input is QASYMM8/QASYMM8_SIGNED.
77      * @param[in] biases           Biases tensor. A 1D tensor with shape [IFM]. Must be nullptr if not needed.
78      *                             Data type supported: Same as @p input, S32 when input is QASYMM8/QASYMM8_SIGNED.
79      * @param[in] conv_info        Padding and stride information to use for the convolution.
80      * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
81      * @param[in] act_info         (Optional) Activation layer information in case of a fused activation.
82      * @param[in] dilation         (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
83      *
84      * @return a status
85      */
86     static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info,
87                            unsigned int depth_multiplier = 1, const ActivationLayerInfo &act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U));
88 
89     // Inherited methods overriden:
90     void run() override;
91     void prepare() override;
92 
93 private:
94     /** Static function to choose the best depthwise convolution function for @ref NEDepthwiseConvolutionLayer
95      *
96      * @param[in] input            Source tensor info. Data type supported: QASYMM8/QASYMM8_SIGNED/F16/F32
97      * @param[in] weights          Weights tensor info. These are 3D tensors with shape [kernel_x, kernel_y, IFM].
98      *                             Data type supported: Same as @p input or QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL when @p input is QASYMM8/QASYMM8_SIGNED.
99      * @param[in] biases           Biases tensor info. A 1D tensor with shape [IFM]. Must be nullptr if not needed.
100      *                             Data type supported: Same as @p input, S32 when input is QASYMM8/QASYMM8_SIGNED.
101      * @param[in] output           Destination tensor. Data type supported: same as @p input.
102      * @param[in] conv_info        Padding and stride information to use for the convolution.
103      * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
104      * @param[in] act_info         (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU for 3x3 quantized are supported.
105      * @param[in] dilation         (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
106      *
107      * @return a Depthwise Convolution Function
108      */
109     static DepthwiseConvolutionFunction get_depthwiseconvolution_function(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output,
110                                                                           const PadStrideInfo &conv_info, unsigned int depth_multiplier = 1,
111                                                                           ActivationLayerInfo act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U));
112 
113     /** Basic function to execute optimized depthwise convolution routines. This function calls the following NEON kernels:
114     *
115     * @note At the moment 3x3 and 5x5 convolution of stride 1, 2 are supported
116     *
117     * -# @ref NEFillBorderKernel (if pad_x or pad_y > 0) and no assembly kernel implementation is present
118     * -# @ref NEDepthwiseConvolutionLayer3x3Kernel if 3x3 and no assembly kernel implementation is present
119     * -# @ref NEDepthwiseConvolutionAssemblyDispatch if assembly kernel implementation is present
120     * -# @ref NEDirectConvolutionLayerOutputStageKernel if re-quantization of output is required
121     * -# @ref NEActivationLayer if fused activation is required
122     *
123     */
124     class NEDepthwiseConvolutionLayerOptimizedInternal : public IFunction
125     {
126     public:
127         /** Default constructor */
128         NEDepthwiseConvolutionLayerOptimizedInternal(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
129         /** Prevent instances of this class from being copied (As this class contains pointers) */
130         NEDepthwiseConvolutionLayerOptimizedInternal(const NEDepthwiseConvolutionLayerOptimizedInternal &) = delete;
131         /** Default move constructor */
132         NEDepthwiseConvolutionLayerOptimizedInternal(NEDepthwiseConvolutionLayerOptimizedInternal &&) = default;
133         /** Prevent instances of this class from being copied (As this class contains pointers) */
134         NEDepthwiseConvolutionLayerOptimizedInternal &operator=(const NEDepthwiseConvolutionLayerOptimizedInternal &) = delete;
135         /** Default move assignment operator */
136         NEDepthwiseConvolutionLayerOptimizedInternal &operator=(NEDepthwiseConvolutionLayerOptimizedInternal &&) = default;
137         /** Default destructor */
138         ~NEDepthwiseConvolutionLayerOptimizedInternal() = default;
139         /** Initialize the function's source, destination, kernels and border_size.
140          *
141          * @param[in, out] input            Source tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/F16/F32. (Written to only for border filling).
142          * @param[in]      weights          Weights tensor. These are 3D tensors with shape [kernel_x, kernel_y, IFM]. Data type supported: Same as @p input.
143          * @param[in]      biases           Biases tensor. A 1D tensor with shape [IFM]. Must be nullptr if not needed.
144          *                                  Data type supported: Same as @p input, S32 when input is QASYMM8/QASYMM8_SIGNED.
145          * @param[out]     output           Destination tensor. Data type supported: same as @p input.
146          * @param[in]      conv_info        Padding and stride information to use for the convolution.
147          * @param[in]      depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
148          * @param[in]      act_info         (Optional) Activation layer information in case of a fused activation.
149          * @param[in]      dilation         (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
150          */
151         void configure(ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info,
152                        unsigned int depth_multiplier = 1, const ActivationLayerInfo &act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U));
153 
154         /** Static function to check if given info will lead to a valid configuration of @ref NEDepthwiseConvolutionLayer3x3
155          *
156          * @param[in] input            Source tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/F16/F32. (Written to only for border filling).
157          * @param[in] weights          Weights tensor. These are 3D tensors with shape [kernel_x, kernel_y, IFM]. Data type supported: Same as @p input.
158          * @param[in] biases           Biases tensor. A 1D tensor with shape [IFM]. Must be nullptr if not needed.
159          *                             Data type supported: Same as @p input, S32 when input is QASYMM8/QASYMM8_SIGNED.
160          * @param[in] output           Destination tensor. Data type supported: same as @p input.
161          * @param[in] conv_info        Padding and stride information to use for the convolution.
162          * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
163          * @param[in] act_info         (Optional) Activation layer information in case of a fused activation.
164          * @param[in] dilation         (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
165          *
166          * @return a status
167          */
168         static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info,
169                                unsigned int depth_multiplier = 1, const ActivationLayerInfo &act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U));
170 
171         // Inherited methods overriden:
172         void run() override;
173         void prepare() override;
174 
175     private:
176         MemoryGroup                            _memory_group;
177         NEDepthwiseConvolutionAssemblyDispatch _dwc_optimized_func;
178         NEPermute                              _permute_input;
179         NEPermute                              _permute_weights;
180         NEPermute                              _permute_output;
181         NEActivationLayer                      _activationlayer_function;
182         Tensor                                 _accumulator;
183         Tensor                                 _permuted_input;
184         Tensor                                 _permuted_weights;
185         Tensor                                 _permuted_output;
186         const ITensor                         *_original_weights;
187         bool                                   _has_bias;
188         bool                                   _is_quantized;
189         bool                                   _is_nchw;
190         bool                                   _permute;
191         bool                                   _is_activationlayer_enabled;
192         bool                                   _is_prepared;
193     };
194 
195     /** Basic function to execute a generic depthwise convolution. This function calls the following NEON kernel:
196      *
197      * -# @ref NEDepthwiseConvolutionLayerNativeKernel
198      *
199      */
200     class NEDepthwiseConvolutionLayerGeneric : public IFunction
201     {
202     public:
203         /** Default constructor */
204         NEDepthwiseConvolutionLayerGeneric();
205         /** Prevent instances of this class from being copied (As this class contains pointers) */
206         NEDepthwiseConvolutionLayerGeneric(const NEDepthwiseConvolutionLayerGeneric &) = delete;
207         /** Default move constructor */
208         NEDepthwiseConvolutionLayerGeneric(NEDepthwiseConvolutionLayerGeneric &&) = default;
209         /** Prevent instances of this class from being copied (As this class contains pointers) */
210         NEDepthwiseConvolutionLayerGeneric &operator=(const NEDepthwiseConvolutionLayerGeneric &) = delete;
211         /** Default move assignment operator */
212         NEDepthwiseConvolutionLayerGeneric &operator=(NEDepthwiseConvolutionLayerGeneric &&) = default;
213         /** Default destructor */
214         ~NEDepthwiseConvolutionLayerGeneric() = default;
215         /** Initialize the function's source, destination, weights and convolution information.
216          *
217          * @param[in, out] input            Source tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/F16/F32. (Written to only for border filling).
218          * @param[out]     output           Destination tensor. Data type supported: same as @p input.
219          * @param[in]      weights          Weights tensor. These are 3D tensors with shape [kernel_x, kernel_y, IFM].
220          *                                  Data type supported: Same as @p input or QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL when @p input is QASYMM8/QASYMM8_SIGNED.
221          * @param[in]      biases           Biases tensor. A 1D tensor with shape [IFM]. Must be nullptr if not needed.
222          *                                  Data type supported: Same as @p input, S32 when input is QASYMM8/QASYMM8_SIGNED.
223          * @param[in]      conv_info        Padding and stride information to use for the convolution.
224          * @param[in]      depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
225          * @param[in]      act_info         (Optional) Activation layer information in case of a fused activation.
226          * @param[in]      dilation         (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
227          */
228         void configure(ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info,
229                        unsigned int depth_multiplier = 1, const ActivationLayerInfo &act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U));
230 
231         /** Static function to check if given info will lead to a valid configuration of @ref NEDepthwiseConvolutionLayerGeneric
232          *
233          * @param[in] input            Source tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/F16/F32. (Written to only for border filling).
234          * @param[in] output           Destination tensor. Data type supported: same as @p input.
235          * @param[in] weights          Weights tensor. These are 3D tensors with shape [kernel_x, kernel_y, IFM].
236          *                             Data type supported: Same as @p input or QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL when @p input is QASYMM8/QASYMM8_SIGNED.
237          * @param[in] biases           Biases tensor. A 1D tensor with shape [IFM]. Must be nullptr if not needed.
238          *                             Data type supported: Same as @p input, S32 when input is QASYMM8/QASYMM8_SIGNED.
239          * @param[in] conv_info        Padding and stride information to use for the convolution.
240          * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
241          * @param[in] act_info         (Optional) Activation layer information in case of a fused activation.
242          * @param[in] dilation         (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
243          *
244          * @return a status
245          */
246         static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info,
247                                unsigned int depth_multiplier = 1, const ActivationLayerInfo &act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U));
248 
249         // Inherited methods overriden:
250         void run() override;
251         void prepare() override;
252 
253     private:
254         std::unique_ptr<NEDepthwiseConvolutionLayerNativeKernel> _depthwise_conv_kernel;
255         NEPermute                                                _permute_input;
256         NEPermute                                                _permute_weights;
257         NEPermute                                                _permute_output;
258         NEActivationLayer                                        _activationlayer_function;
259         Tensor                                                   _permuted_input;
260         Tensor                                                   _permuted_weights;
261         Tensor                                                   _permuted_output;
262         bool                                                     _is_prepared;
263         bool                                                     _is_nchw;
264         bool                                                     _is_activationlayer_enabled;
265         const ITensor                                           *_original_weights;
266     };
267 
268     DepthwiseConvolutionFunction                 _depth_conv_func;
269     NEDepthwiseConvolutionLayerOptimizedInternal _func_optimized;
270     NEDepthwiseConvolutionLayerGeneric           _func_generic;
271 };
272 } // namespace arm_compute
273 #endif /* ARM_COMPUTE_NEDEPTHWISECONVOLUTION_H */