• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2017-2020 Arm Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
24 #include "arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h"
25 
26 #include "arm_compute/core/utils/misc/InfoHelpers.h"
27 #include "arm_compute/core/utils/misc/ShapeCalculator.h"
28 #include "arm_compute/core/utils/quantization/AsymmHelpers.h"
29 #include "arm_compute/runtime/NEON/NEScheduler.h"
30 #include "src/core/NEON/kernels/NEDepthwiseConvolutionLayerNativeKernel.h"
31 #include "support/MemorySupport.h"
32 
33 using namespace arm_compute::misc;
34 using namespace arm_compute::misc::shape_calculator;
35 
36 namespace arm_compute
37 {
38 namespace
39 {
validate_arguments_optimized(const ITensorInfo * input,const ITensorInfo * weights,const ITensorInfo * biases,const ITensorInfo * output,const PadStrideInfo & conv_info,unsigned int depth_multiplier,const ActivationLayerInfo & act_info,const Size2D & dilation)40 Status validate_arguments_optimized(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info,
41                                     unsigned int depth_multiplier, const ActivationLayerInfo &act_info, const Size2D &dilation)
42 {
43     ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, weights, output);
44     ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::F16, DataType::F32);
45     if(!is_data_type_quantized_per_channel(weights->data_type()))
46     {
47         ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, weights);
48     }
49     ARM_COMPUTE_RETURN_ERROR_ON(input->data_layout() == DataLayout::UNKNOWN);
50     ARM_COMPUTE_RETURN_ERROR_ON(dilation.x() < 1 || dilation.y() < 1);
51     const size_t idx_w = get_data_layout_dimension_index(input->data_layout(), DataLayoutDimension::WIDTH);
52     const size_t idx_h = get_data_layout_dimension_index(input->data_layout(), DataLayoutDimension::HEIGHT);
53     ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(idx_w) + (weights->dimension(idx_w) - 1) * (dilation.x() - 1) > input->dimension(idx_w) + conv_info.pad_left() + conv_info.pad_right());
54     ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(idx_h) + (weights->dimension(idx_h) - 1) * (dilation.y() - 1) > input->dimension(idx_h) + conv_info.pad_top() + conv_info.pad_bottom());
55 
56     if(biases != nullptr)
57     {
58         const unsigned int channel_idx = get_data_layout_dimension_index(input->data_layout(), DataLayoutDimension::CHANNEL);
59         ARM_COMPUTE_RETURN_ERROR_ON(biases->num_dimensions() > 1);
60         ARM_COMPUTE_RETURN_ERROR_ON(biases->dimension(0) != weights->dimension(channel_idx));
61     }
62 
63     ARM_COMPUTE_RETURN_ON_ERROR(NEDepthwiseConvolutionAssemblyDispatch::validate(input, weights, biases, output, conv_info, depth_multiplier, act_info, dilation));
64 
65     //Validate Activation Layer
66     if(act_info.enabled())
67     {
68         ARM_COMPUTE_RETURN_ON_ERROR(NEActivationLayer::validate(output, nullptr, act_info));
69     }
70     return Status{};
71 }
72 } // namespace
73 
74 NEDepthwiseConvolutionLayer::~NEDepthwiseConvolutionLayer() = default;
75 
NEDepthwiseConvolutionLayerOptimizedInternal(std::shared_ptr<IMemoryManager> memory_manager)76 NEDepthwiseConvolutionLayer::NEDepthwiseConvolutionLayerOptimizedInternal::NEDepthwiseConvolutionLayerOptimizedInternal(std::shared_ptr<IMemoryManager> memory_manager)
77     : _memory_group(memory_manager), _dwc_optimized_func(memory_manager), _permute_input(), _permute_weights(), _permute_output(), _activationlayer_function(), _accumulator(), _permuted_input(),
78       _permuted_weights(), _permuted_output(), _original_weights(nullptr), _has_bias(false), _is_quantized(false), _is_nchw(true), _permute(false), _is_activationlayer_enabled(false), _is_prepared(false)
79 {
80 }
81 
configure(ITensor * input,const ITensor * weights,const ITensor * biases,ITensor * output,const PadStrideInfo & conv_info,unsigned int depth_multiplier,const ActivationLayerInfo & act_info,const Size2D & dilation)82 void NEDepthwiseConvolutionLayer::NEDepthwiseConvolutionLayerOptimizedInternal::configure(ITensor       *input,
83                                                                                           const ITensor *weights,
84                                                                                           const ITensor *biases,
85                                                                                           ITensor *output, const PadStrideInfo &conv_info,
86                                                                                           unsigned int               depth_multiplier,
87                                                                                           const ActivationLayerInfo &act_info,
88                                                                                           const Size2D              &dilation)
89 {
90     ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output);
91     // Perform validation step
92     ARM_COMPUTE_ERROR_THROW_ON(NEDepthwiseConvolutionLayerOptimizedInternal::validate(input->info(), weights->info(), (biases == nullptr) ? nullptr : biases->info(),
93                                                                                       output->info(), conv_info, depth_multiplier, act_info, dilation));
94 
95     _original_weights           = weights;
96     _is_quantized               = is_data_type_quantized_asymmetric(input->info()->data_type());
97     _has_bias                   = biases != nullptr;
98     _is_nchw                    = input->info()->data_layout() == DataLayout::NCHW;
99     _permute                    = _is_nchw;
100     _is_prepared                = false;
101     _is_activationlayer_enabled = act_info.enabled();
102 
103     // Configure pipeline
104     ActivationLayerInfo act_info_to_use = ActivationLayerInfo();
105     const bool          is_relu         = arm_compute::utils::info_helpers::is_relu(act_info);
106     const bool          is_relu6        = arm_compute::utils::info_helpers::is_relu6(act_info);
107     _is_activationlayer_enabled         = act_info.enabled() && !(is_relu || is_relu6);
108     if(!_is_activationlayer_enabled)
109     {
110         act_info_to_use = act_info;
111     }
112 
113     if(_is_nchw)
114     {
115         _memory_group.manage(&_permuted_input);
116         _memory_group.manage(&_permuted_output);
117 
118         // Configure the function to transform the input tensor from NCHW -> NHWC
119         _permute_input.configure(input, &_permuted_input, PermutationVector(2U, 0U, 1U));
120         _permuted_input.info()->set_data_layout(DataLayout::NHWC);
121 
122         // Configure the function to transform the weights tensor from IHW -> HWI
123         _permute_weights.configure(weights, &_permuted_weights, PermutationVector(2U, 0U, 1U));
124         _permuted_weights.info()->set_data_layout(DataLayout::NHWC);
125 
126         _permuted_output.info()->set_data_layout(DataLayout::NHWC);
127         _permuted_output.info()->set_quantization_info(output->info()->quantization_info());
128 
129         // Configure optimized depthwise
130         _dwc_optimized_func.configure(&_permuted_input, &_permuted_weights, biases, &_permuted_output, conv_info, depth_multiplier, act_info_to_use, dilation);
131 
132         // Configure the function to transform the convoluted output to ACL's native ordering format NCHW
133         _permuted_output.info()->set_data_layout(DataLayout::NHWC);
134         _permute_output.configure(&_permuted_output, output, PermutationVector(1U, 2U, 0U));
135 
136         // Allocate tensors
137         _permuted_input.allocator()->allocate();
138         _permuted_output.allocator()->allocate();
139     }
140     else
141     {
142         _dwc_optimized_func.configure(input, weights, biases, output, conv_info, depth_multiplier, act_info_to_use, dilation);
143     }
144 
145     // Configure activation
146     if(_is_activationlayer_enabled)
147     {
148         _activationlayer_function.configure(output, nullptr, act_info);
149     }
150 }
151 
validate(const ITensorInfo * input,const ITensorInfo * weights,const ITensorInfo * biases,const ITensorInfo * output,const PadStrideInfo & conv_info,unsigned int depth_multiplier,const ActivationLayerInfo & act_info,const Size2D & dilation)152 Status NEDepthwiseConvolutionLayer::NEDepthwiseConvolutionLayerOptimizedInternal::validate(const ITensorInfo         *input,
153                                                                                            const ITensorInfo         *weights,
154                                                                                            const ITensorInfo         *biases,
155                                                                                            const ITensorInfo         *output,
156                                                                                            const PadStrideInfo       &conv_info,
157                                                                                            unsigned int               depth_multiplier,
158                                                                                            const ActivationLayerInfo &act_info,
159                                                                                            const Size2D              &dilation)
160 {
161     return validate_arguments_optimized(input, weights, biases, output, conv_info, depth_multiplier, act_info, dilation);
162 }
163 
run()164 void NEDepthwiseConvolutionLayer::NEDepthwiseConvolutionLayerOptimizedInternal::run()
165 {
166     prepare();
167 
168     MemoryGroupResourceScope scope_mg(_memory_group);
169 
170     // Permute input
171     if(_permute)
172     {
173         _permute_input.run();
174     }
175 
176     // Run assembly function
177     _dwc_optimized_func.run();
178 
179     // Permute output
180     if(_is_nchw)
181     {
182         _permute_output.run();
183     }
184 
185     // Run activation
186     if(_is_activationlayer_enabled)
187     {
188         _activationlayer_function.run();
189     }
190 }
191 
prepare()192 void NEDepthwiseConvolutionLayer::NEDepthwiseConvolutionLayerOptimizedInternal::prepare()
193 {
194     if(!_is_prepared)
195     {
196         // Permute weights
197         if(_permute)
198         {
199             _permuted_weights.allocator()->allocate();
200             _permute_weights.run();
201             _original_weights->mark_as_unused();
202         }
203 
204         // Prepare optimized function
205         _dwc_optimized_func.prepare();
206         if(!_permuted_weights.is_used())
207         {
208             _permuted_weights.allocator()->free();
209         }
210 
211         _is_prepared = true;
212     }
213 }
214 
NEDepthwiseConvolutionLayerGeneric()215 NEDepthwiseConvolutionLayer::NEDepthwiseConvolutionLayerGeneric::NEDepthwiseConvolutionLayerGeneric()
216     : _depthwise_conv_kernel(), _permute_input(), _permute_weights(), _permute_output(), _activationlayer_function(), _permuted_input(), _permuted_weights(), _permuted_output(), _is_prepared(false),
217       _is_nchw(false), _is_activationlayer_enabled(false), _original_weights(nullptr)
218 {
219 }
220 
configure(ITensor * input,const ITensor * weights,const ITensor * biases,ITensor * output,const PadStrideInfo & conv_info,unsigned int depth_multiplier,const ActivationLayerInfo & act_info,const Size2D & dilation)221 void NEDepthwiseConvolutionLayer::NEDepthwiseConvolutionLayerGeneric::configure(ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info,
222                                                                                 unsigned int depth_multiplier, const ActivationLayerInfo &act_info, const Size2D &dilation)
223 {
224     ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output);
225     ARM_COMPUTE_ERROR_THROW_ON(NEDepthwiseConvolutionLayer::validate(input->info(), weights->info(), (biases == nullptr) ? nullptr : biases->info(),
226                                                                      output->info(), conv_info, depth_multiplier, act_info, dilation));
227 
228     _is_nchw     = input->info()->data_layout() == DataLayout::NCHW;
229     _is_prepared = !_is_nchw;
230 
231     ITensor       *input_to_use   = input;
232     const ITensor *weights_to_use = weights;
233     ITensor       *output_to_use  = output;
234     if(_is_nchw)
235     {
236         _permute_input.configure(input, &_permuted_input, PermutationVector(2U, 0U, 1U));
237         _permuted_input.info()->set_data_layout(DataLayout::NHWC);
238         input_to_use = &_permuted_input;
239 
240         _permute_weights.configure(weights, &_permuted_weights, PermutationVector(2U, 0U, 1U));
241         _permuted_weights.info()->set_data_layout(DataLayout::NHWC);
242         weights_to_use = &_permuted_weights;
243 
244         _permuted_output.allocator()->init(output->info()->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(TensorShape()));
245         output_to_use = &_permuted_output;
246     }
247     _original_weights = weights_to_use;
248 
249     _depthwise_conv_kernel = arm_compute::support::cpp14::make_unique<NEDepthwiseConvolutionLayerNativeKernel>();
250     _depthwise_conv_kernel->configure(input_to_use, weights_to_use, biases, output_to_use, conv_info, depth_multiplier, dilation);
251 
252     if(_is_nchw)
253     {
254         _permute_output.configure(&_permuted_output, output, PermutationVector(1U, 2U, 0U));
255         _permuted_output.info()->set_data_layout(DataLayout::NHWC);
256 
257         _permuted_input.allocator()->allocate();
258         _permuted_weights.allocator()->allocate();
259         _permuted_output.allocator()->allocate();
260     }
261 
262     //Configure Activation Layer
263     _is_activationlayer_enabled = act_info.enabled();
264     if(_is_activationlayer_enabled)
265     {
266         _activationlayer_function.configure(output, nullptr, act_info);
267     }
268 }
269 
validate(const ITensorInfo * input,const ITensorInfo * weights,const ITensorInfo * biases,const ITensorInfo * output,const PadStrideInfo & conv_info,unsigned int depth_multiplier,const ActivationLayerInfo & act_info,const Size2D & dilation)270 Status NEDepthwiseConvolutionLayer::NEDepthwiseConvolutionLayerGeneric::validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output,
271                                                                                  const PadStrideInfo &conv_info,
272                                                                                  unsigned int depth_multiplier, const ActivationLayerInfo &act_info, const Size2D &dilation)
273 {
274     ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, weights, output);
275     if(input->data_layout() == DataLayout::NCHW)
276     {
277         TensorShape permuted_input_shape   = input->tensor_shape();
278         TensorShape permuted_weights_shape = weights->tensor_shape();
279         TensorShape permuted_output_shape  = misc::shape_calculator::compute_depthwise_convolution_shape(*input, *weights, conv_info, depth_multiplier, dilation);
280         permute(permuted_input_shape, PermutationVector(2U, 0U, 1U));
281         permute(permuted_weights_shape, PermutationVector(2U, 0U, 1U));
282         permute(permuted_output_shape, PermutationVector(2U, 0U, 1U));
283 
284         const TensorInfo permuted_input   = TensorInfo(input->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(permuted_input_shape).set_data_layout(DataLayout::NHWC));
285         const TensorInfo permuted_weights = TensorInfo(weights->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(permuted_weights_shape).set_data_layout(DataLayout::NHWC));
286         const TensorInfo permuted_output  = TensorInfo(output->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(permuted_output_shape).set_data_layout(DataLayout::NCHW));
287 
288         ARM_COMPUTE_RETURN_ON_ERROR(NEPermute::validate(input, &permuted_input, PermutationVector(2U, 0U, 1U)));
289         ARM_COMPUTE_RETURN_ON_ERROR(NEPermute::validate(weights, &permuted_weights, PermutationVector(2U, 0U, 1U)));
290         ARM_COMPUTE_RETURN_ON_ERROR(NEPermute::validate(&permuted_output, output, PermutationVector(1U, 2U, 0U)));
291 
292         ARM_COMPUTE_RETURN_ON_ERROR(NEDepthwiseConvolutionLayerNativeKernel::validate(&permuted_input, &permuted_weights, biases, &permuted_output, conv_info, depth_multiplier, dilation));
293     }
294     else
295     {
296         ARM_COMPUTE_RETURN_ON_ERROR(NEDepthwiseConvolutionLayerNativeKernel::validate(input, weights, biases, output, conv_info, depth_multiplier, dilation));
297     }
298 
299     // Validate Activation Layer
300     if(act_info.enabled())
301     {
302         ARM_COMPUTE_RETURN_ON_ERROR(NEActivationLayer::validate(output, nullptr, act_info));
303     }
304 
305     return Status{};
306 }
307 
run()308 void NEDepthwiseConvolutionLayer::NEDepthwiseConvolutionLayerGeneric::run()
309 {
310     if(_is_nchw)
311     {
312         prepare();
313         _permute_input.run();
314     }
315 
316     NEScheduler::get().schedule(_depthwise_conv_kernel.get(), Window::DimY);
317 
318     if(_is_nchw)
319     {
320         _permute_output.run();
321     }
322 
323     if(_is_activationlayer_enabled)
324     {
325         _activationlayer_function.run();
326     }
327 }
328 
prepare()329 void NEDepthwiseConvolutionLayer::NEDepthwiseConvolutionLayerGeneric::prepare()
330 {
331     if(!_is_prepared)
332     {
333         ARM_COMPUTE_ERROR_ON(!_original_weights->is_used());
334 
335         _permute_weights.run();
336         _original_weights->mark_as_unused();
337         _is_prepared = true;
338     }
339 }
340 
NEDepthwiseConvolutionLayer(std::shared_ptr<IMemoryManager> memory_manager)341 NEDepthwiseConvolutionLayer::NEDepthwiseConvolutionLayer(std::shared_ptr<IMemoryManager> memory_manager)
342     : _depth_conv_func(DepthwiseConvolutionFunction::GENERIC), _func_optimized(std::move(memory_manager)), _func_generic()
343 {
344 }
345 
configure(ITensor * input,const ITensor * weights,const ITensor * biases,ITensor * output,const PadStrideInfo & conv_info,unsigned int depth_multiplier,const ActivationLayerInfo & act_info,const Size2D & dilation)346 void NEDepthwiseConvolutionLayer::configure(ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info, unsigned int depth_multiplier,
347                                             const ActivationLayerInfo &act_info, const Size2D &dilation)
348 {
349     _depth_conv_func = get_depthwiseconvolution_function(input->info(), weights->info(), (biases != nullptr) ? biases->info() : nullptr, output->info(), conv_info, depth_multiplier, act_info, dilation);
350     switch(_depth_conv_func)
351     {
352         case DepthwiseConvolutionFunction::OPTIMIZED:
353             _func_optimized.configure(input, weights, biases, output, conv_info, depth_multiplier, act_info, dilation);
354             break;
355         case DepthwiseConvolutionFunction::GENERIC:
356             _func_generic.configure(input, weights, biases, output, conv_info, depth_multiplier, act_info, dilation);
357             break;
358         default:
359             ARM_COMPUTE_ERROR("Unsupported DepthwiseConvolutionFunction");
360     }
361 }
362 
validate(const ITensorInfo * input,const ITensorInfo * weights,const ITensorInfo * biases,const ITensorInfo * output,const PadStrideInfo & conv_info,unsigned int depth_multiplier,const ActivationLayerInfo & act_info,const Size2D & dilation)363 Status NEDepthwiseConvolutionLayer::validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info,
364                                              unsigned int depth_multiplier, const ActivationLayerInfo &act_info, const Size2D &dilation)
365 {
366     DepthwiseConvolutionFunction depth_conv_func = get_depthwiseconvolution_function(input, weights, biases, output, conv_info, depth_multiplier, act_info, dilation);
367     switch(depth_conv_func)
368     {
369         case DepthwiseConvolutionFunction::OPTIMIZED:
370             return NEDepthwiseConvolutionLayerOptimizedInternal::validate(input, weights, biases, output, conv_info, depth_multiplier, act_info, dilation);
371             break;
372         case DepthwiseConvolutionFunction::GENERIC:
373             return NEDepthwiseConvolutionLayerGeneric::validate(input, weights, biases, output, conv_info, depth_multiplier, act_info, dilation);
374             break;
375         default:
376             ARM_COMPUTE_ERROR("Unsupported DepthwiseConvolutionFunction");
377     }
378 }
379 
get_depthwiseconvolution_function(const ITensorInfo * input,const ITensorInfo * weights,const ITensorInfo * biases,const ITensorInfo * output,const PadStrideInfo & conv_info,unsigned int depth_multiplier,ActivationLayerInfo act_info,const Size2D & dilation)380 DepthwiseConvolutionFunction NEDepthwiseConvolutionLayer::get_depthwiseconvolution_function(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output,
381                                                                                             const PadStrideInfo &conv_info,
382                                                                                             unsigned int depth_multiplier, ActivationLayerInfo act_info, const Size2D &dilation)
383 {
384     if(bool(NEDepthwiseConvolutionLayerOptimizedInternal::validate(input, weights, biases, output, conv_info, depth_multiplier, act_info, dilation)))
385     {
386         return DepthwiseConvolutionFunction::OPTIMIZED;
387     }
388     else
389     {
390         return DepthwiseConvolutionFunction::GENERIC;
391     }
392 }
393 
run()394 void NEDepthwiseConvolutionLayer::run()
395 {
396     switch(_depth_conv_func)
397     {
398         case DepthwiseConvolutionFunction::OPTIMIZED:
399             _func_optimized.run();
400             break;
401         case DepthwiseConvolutionFunction::GENERIC:
402             _func_generic.run();
403             break;
404         default:
405             ARM_COMPUTE_ERROR("DepthwiseConvolutionFunction not properly configured");
406     }
407 }
408 
prepare()409 void NEDepthwiseConvolutionLayer::prepare()
410 {
411     switch(_depth_conv_func)
412     {
413         case DepthwiseConvolutionFunction::OPTIMIZED:
414             _func_optimized.prepare();
415             break;
416         case DepthwiseConvolutionFunction::GENERIC:
417             _func_generic.prepare();
418             break;
419         default:
420             ARM_COMPUTE_ERROR("DepthwiseConvolutionFunction not properly configured");
421     }
422 }
423 } // namespace arm_compute
424