1 /*
2 * Copyright (c) 2017-2020 Arm Limited.
3 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24 #include "arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h"
25
26 #include "arm_compute/core/utils/misc/InfoHelpers.h"
27 #include "arm_compute/core/utils/misc/ShapeCalculator.h"
28 #include "arm_compute/core/utils/quantization/AsymmHelpers.h"
29 #include "arm_compute/runtime/NEON/NEScheduler.h"
30 #include "src/core/NEON/kernels/NEDepthwiseConvolutionLayerNativeKernel.h"
31 #include "support/MemorySupport.h"
32
33 using namespace arm_compute::misc;
34 using namespace arm_compute::misc::shape_calculator;
35
36 namespace arm_compute
37 {
38 namespace
39 {
validate_arguments_optimized(const ITensorInfo * input,const ITensorInfo * weights,const ITensorInfo * biases,const ITensorInfo * output,const PadStrideInfo & conv_info,unsigned int depth_multiplier,const ActivationLayerInfo & act_info,const Size2D & dilation)40 Status validate_arguments_optimized(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info,
41 unsigned int depth_multiplier, const ActivationLayerInfo &act_info, const Size2D &dilation)
42 {
43 ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, weights, output);
44 ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::F16, DataType::F32);
45 if(!is_data_type_quantized_per_channel(weights->data_type()))
46 {
47 ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, weights);
48 }
49 ARM_COMPUTE_RETURN_ERROR_ON(input->data_layout() == DataLayout::UNKNOWN);
50 ARM_COMPUTE_RETURN_ERROR_ON(dilation.x() < 1 || dilation.y() < 1);
51 const size_t idx_w = get_data_layout_dimension_index(input->data_layout(), DataLayoutDimension::WIDTH);
52 const size_t idx_h = get_data_layout_dimension_index(input->data_layout(), DataLayoutDimension::HEIGHT);
53 ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(idx_w) + (weights->dimension(idx_w) - 1) * (dilation.x() - 1) > input->dimension(idx_w) + conv_info.pad_left() + conv_info.pad_right());
54 ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(idx_h) + (weights->dimension(idx_h) - 1) * (dilation.y() - 1) > input->dimension(idx_h) + conv_info.pad_top() + conv_info.pad_bottom());
55
56 if(biases != nullptr)
57 {
58 const unsigned int channel_idx = get_data_layout_dimension_index(input->data_layout(), DataLayoutDimension::CHANNEL);
59 ARM_COMPUTE_RETURN_ERROR_ON(biases->num_dimensions() > 1);
60 ARM_COMPUTE_RETURN_ERROR_ON(biases->dimension(0) != weights->dimension(channel_idx));
61 }
62
63 ARM_COMPUTE_RETURN_ON_ERROR(NEDepthwiseConvolutionAssemblyDispatch::validate(input, weights, biases, output, conv_info, depth_multiplier, act_info, dilation));
64
65 //Validate Activation Layer
66 if(act_info.enabled())
67 {
68 ARM_COMPUTE_RETURN_ON_ERROR(NEActivationLayer::validate(output, nullptr, act_info));
69 }
70 return Status{};
71 }
72 } // namespace
73
74 NEDepthwiseConvolutionLayer::~NEDepthwiseConvolutionLayer() = default;
75
NEDepthwiseConvolutionLayerOptimizedInternal(std::shared_ptr<IMemoryManager> memory_manager)76 NEDepthwiseConvolutionLayer::NEDepthwiseConvolutionLayerOptimizedInternal::NEDepthwiseConvolutionLayerOptimizedInternal(std::shared_ptr<IMemoryManager> memory_manager)
77 : _memory_group(memory_manager), _dwc_optimized_func(memory_manager), _permute_input(), _permute_weights(), _permute_output(), _activationlayer_function(), _accumulator(), _permuted_input(),
78 _permuted_weights(), _permuted_output(), _original_weights(nullptr), _has_bias(false), _is_quantized(false), _is_nchw(true), _permute(false), _is_activationlayer_enabled(false), _is_prepared(false)
79 {
80 }
81
configure(ITensor * input,const ITensor * weights,const ITensor * biases,ITensor * output,const PadStrideInfo & conv_info,unsigned int depth_multiplier,const ActivationLayerInfo & act_info,const Size2D & dilation)82 void NEDepthwiseConvolutionLayer::NEDepthwiseConvolutionLayerOptimizedInternal::configure(ITensor *input,
83 const ITensor *weights,
84 const ITensor *biases,
85 ITensor *output, const PadStrideInfo &conv_info,
86 unsigned int depth_multiplier,
87 const ActivationLayerInfo &act_info,
88 const Size2D &dilation)
89 {
90 ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output);
91 // Perform validation step
92 ARM_COMPUTE_ERROR_THROW_ON(NEDepthwiseConvolutionLayerOptimizedInternal::validate(input->info(), weights->info(), (biases == nullptr) ? nullptr : biases->info(),
93 output->info(), conv_info, depth_multiplier, act_info, dilation));
94
95 _original_weights = weights;
96 _is_quantized = is_data_type_quantized_asymmetric(input->info()->data_type());
97 _has_bias = biases != nullptr;
98 _is_nchw = input->info()->data_layout() == DataLayout::NCHW;
99 _permute = _is_nchw;
100 _is_prepared = false;
101 _is_activationlayer_enabled = act_info.enabled();
102
103 // Configure pipeline
104 ActivationLayerInfo act_info_to_use = ActivationLayerInfo();
105 const bool is_relu = arm_compute::utils::info_helpers::is_relu(act_info);
106 const bool is_relu6 = arm_compute::utils::info_helpers::is_relu6(act_info);
107 _is_activationlayer_enabled = act_info.enabled() && !(is_relu || is_relu6);
108 if(!_is_activationlayer_enabled)
109 {
110 act_info_to_use = act_info;
111 }
112
113 if(_is_nchw)
114 {
115 _memory_group.manage(&_permuted_input);
116 _memory_group.manage(&_permuted_output);
117
118 // Configure the function to transform the input tensor from NCHW -> NHWC
119 _permute_input.configure(input, &_permuted_input, PermutationVector(2U, 0U, 1U));
120 _permuted_input.info()->set_data_layout(DataLayout::NHWC);
121
122 // Configure the function to transform the weights tensor from IHW -> HWI
123 _permute_weights.configure(weights, &_permuted_weights, PermutationVector(2U, 0U, 1U));
124 _permuted_weights.info()->set_data_layout(DataLayout::NHWC);
125
126 _permuted_output.info()->set_data_layout(DataLayout::NHWC);
127 _permuted_output.info()->set_quantization_info(output->info()->quantization_info());
128
129 // Configure optimized depthwise
130 _dwc_optimized_func.configure(&_permuted_input, &_permuted_weights, biases, &_permuted_output, conv_info, depth_multiplier, act_info_to_use, dilation);
131
132 // Configure the function to transform the convoluted output to ACL's native ordering format NCHW
133 _permuted_output.info()->set_data_layout(DataLayout::NHWC);
134 _permute_output.configure(&_permuted_output, output, PermutationVector(1U, 2U, 0U));
135
136 // Allocate tensors
137 _permuted_input.allocator()->allocate();
138 _permuted_output.allocator()->allocate();
139 }
140 else
141 {
142 _dwc_optimized_func.configure(input, weights, biases, output, conv_info, depth_multiplier, act_info_to_use, dilation);
143 }
144
145 // Configure activation
146 if(_is_activationlayer_enabled)
147 {
148 _activationlayer_function.configure(output, nullptr, act_info);
149 }
150 }
151
validate(const ITensorInfo * input,const ITensorInfo * weights,const ITensorInfo * biases,const ITensorInfo * output,const PadStrideInfo & conv_info,unsigned int depth_multiplier,const ActivationLayerInfo & act_info,const Size2D & dilation)152 Status NEDepthwiseConvolutionLayer::NEDepthwiseConvolutionLayerOptimizedInternal::validate(const ITensorInfo *input,
153 const ITensorInfo *weights,
154 const ITensorInfo *biases,
155 const ITensorInfo *output,
156 const PadStrideInfo &conv_info,
157 unsigned int depth_multiplier,
158 const ActivationLayerInfo &act_info,
159 const Size2D &dilation)
160 {
161 return validate_arguments_optimized(input, weights, biases, output, conv_info, depth_multiplier, act_info, dilation);
162 }
163
run()164 void NEDepthwiseConvolutionLayer::NEDepthwiseConvolutionLayerOptimizedInternal::run()
165 {
166 prepare();
167
168 MemoryGroupResourceScope scope_mg(_memory_group);
169
170 // Permute input
171 if(_permute)
172 {
173 _permute_input.run();
174 }
175
176 // Run assembly function
177 _dwc_optimized_func.run();
178
179 // Permute output
180 if(_is_nchw)
181 {
182 _permute_output.run();
183 }
184
185 // Run activation
186 if(_is_activationlayer_enabled)
187 {
188 _activationlayer_function.run();
189 }
190 }
191
prepare()192 void NEDepthwiseConvolutionLayer::NEDepthwiseConvolutionLayerOptimizedInternal::prepare()
193 {
194 if(!_is_prepared)
195 {
196 // Permute weights
197 if(_permute)
198 {
199 _permuted_weights.allocator()->allocate();
200 _permute_weights.run();
201 _original_weights->mark_as_unused();
202 }
203
204 // Prepare optimized function
205 _dwc_optimized_func.prepare();
206 if(!_permuted_weights.is_used())
207 {
208 _permuted_weights.allocator()->free();
209 }
210
211 _is_prepared = true;
212 }
213 }
214
NEDepthwiseConvolutionLayerGeneric()215 NEDepthwiseConvolutionLayer::NEDepthwiseConvolutionLayerGeneric::NEDepthwiseConvolutionLayerGeneric()
216 : _depthwise_conv_kernel(), _permute_input(), _permute_weights(), _permute_output(), _activationlayer_function(), _permuted_input(), _permuted_weights(), _permuted_output(), _is_prepared(false),
217 _is_nchw(false), _is_activationlayer_enabled(false), _original_weights(nullptr)
218 {
219 }
220
configure(ITensor * input,const ITensor * weights,const ITensor * biases,ITensor * output,const PadStrideInfo & conv_info,unsigned int depth_multiplier,const ActivationLayerInfo & act_info,const Size2D & dilation)221 void NEDepthwiseConvolutionLayer::NEDepthwiseConvolutionLayerGeneric::configure(ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info,
222 unsigned int depth_multiplier, const ActivationLayerInfo &act_info, const Size2D &dilation)
223 {
224 ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output);
225 ARM_COMPUTE_ERROR_THROW_ON(NEDepthwiseConvolutionLayer::validate(input->info(), weights->info(), (biases == nullptr) ? nullptr : biases->info(),
226 output->info(), conv_info, depth_multiplier, act_info, dilation));
227
228 _is_nchw = input->info()->data_layout() == DataLayout::NCHW;
229 _is_prepared = !_is_nchw;
230
231 ITensor *input_to_use = input;
232 const ITensor *weights_to_use = weights;
233 ITensor *output_to_use = output;
234 if(_is_nchw)
235 {
236 _permute_input.configure(input, &_permuted_input, PermutationVector(2U, 0U, 1U));
237 _permuted_input.info()->set_data_layout(DataLayout::NHWC);
238 input_to_use = &_permuted_input;
239
240 _permute_weights.configure(weights, &_permuted_weights, PermutationVector(2U, 0U, 1U));
241 _permuted_weights.info()->set_data_layout(DataLayout::NHWC);
242 weights_to_use = &_permuted_weights;
243
244 _permuted_output.allocator()->init(output->info()->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(TensorShape()));
245 output_to_use = &_permuted_output;
246 }
247 _original_weights = weights_to_use;
248
249 _depthwise_conv_kernel = arm_compute::support::cpp14::make_unique<NEDepthwiseConvolutionLayerNativeKernel>();
250 _depthwise_conv_kernel->configure(input_to_use, weights_to_use, biases, output_to_use, conv_info, depth_multiplier, dilation);
251
252 if(_is_nchw)
253 {
254 _permute_output.configure(&_permuted_output, output, PermutationVector(1U, 2U, 0U));
255 _permuted_output.info()->set_data_layout(DataLayout::NHWC);
256
257 _permuted_input.allocator()->allocate();
258 _permuted_weights.allocator()->allocate();
259 _permuted_output.allocator()->allocate();
260 }
261
262 //Configure Activation Layer
263 _is_activationlayer_enabled = act_info.enabled();
264 if(_is_activationlayer_enabled)
265 {
266 _activationlayer_function.configure(output, nullptr, act_info);
267 }
268 }
269
validate(const ITensorInfo * input,const ITensorInfo * weights,const ITensorInfo * biases,const ITensorInfo * output,const PadStrideInfo & conv_info,unsigned int depth_multiplier,const ActivationLayerInfo & act_info,const Size2D & dilation)270 Status NEDepthwiseConvolutionLayer::NEDepthwiseConvolutionLayerGeneric::validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output,
271 const PadStrideInfo &conv_info,
272 unsigned int depth_multiplier, const ActivationLayerInfo &act_info, const Size2D &dilation)
273 {
274 ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, weights, output);
275 if(input->data_layout() == DataLayout::NCHW)
276 {
277 TensorShape permuted_input_shape = input->tensor_shape();
278 TensorShape permuted_weights_shape = weights->tensor_shape();
279 TensorShape permuted_output_shape = misc::shape_calculator::compute_depthwise_convolution_shape(*input, *weights, conv_info, depth_multiplier, dilation);
280 permute(permuted_input_shape, PermutationVector(2U, 0U, 1U));
281 permute(permuted_weights_shape, PermutationVector(2U, 0U, 1U));
282 permute(permuted_output_shape, PermutationVector(2U, 0U, 1U));
283
284 const TensorInfo permuted_input = TensorInfo(input->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(permuted_input_shape).set_data_layout(DataLayout::NHWC));
285 const TensorInfo permuted_weights = TensorInfo(weights->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(permuted_weights_shape).set_data_layout(DataLayout::NHWC));
286 const TensorInfo permuted_output = TensorInfo(output->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(permuted_output_shape).set_data_layout(DataLayout::NCHW));
287
288 ARM_COMPUTE_RETURN_ON_ERROR(NEPermute::validate(input, &permuted_input, PermutationVector(2U, 0U, 1U)));
289 ARM_COMPUTE_RETURN_ON_ERROR(NEPermute::validate(weights, &permuted_weights, PermutationVector(2U, 0U, 1U)));
290 ARM_COMPUTE_RETURN_ON_ERROR(NEPermute::validate(&permuted_output, output, PermutationVector(1U, 2U, 0U)));
291
292 ARM_COMPUTE_RETURN_ON_ERROR(NEDepthwiseConvolutionLayerNativeKernel::validate(&permuted_input, &permuted_weights, biases, &permuted_output, conv_info, depth_multiplier, dilation));
293 }
294 else
295 {
296 ARM_COMPUTE_RETURN_ON_ERROR(NEDepthwiseConvolutionLayerNativeKernel::validate(input, weights, biases, output, conv_info, depth_multiplier, dilation));
297 }
298
299 // Validate Activation Layer
300 if(act_info.enabled())
301 {
302 ARM_COMPUTE_RETURN_ON_ERROR(NEActivationLayer::validate(output, nullptr, act_info));
303 }
304
305 return Status{};
306 }
307
run()308 void NEDepthwiseConvolutionLayer::NEDepthwiseConvolutionLayerGeneric::run()
309 {
310 if(_is_nchw)
311 {
312 prepare();
313 _permute_input.run();
314 }
315
316 NEScheduler::get().schedule(_depthwise_conv_kernel.get(), Window::DimY);
317
318 if(_is_nchw)
319 {
320 _permute_output.run();
321 }
322
323 if(_is_activationlayer_enabled)
324 {
325 _activationlayer_function.run();
326 }
327 }
328
prepare()329 void NEDepthwiseConvolutionLayer::NEDepthwiseConvolutionLayerGeneric::prepare()
330 {
331 if(!_is_prepared)
332 {
333 ARM_COMPUTE_ERROR_ON(!_original_weights->is_used());
334
335 _permute_weights.run();
336 _original_weights->mark_as_unused();
337 _is_prepared = true;
338 }
339 }
340
NEDepthwiseConvolutionLayer(std::shared_ptr<IMemoryManager> memory_manager)341 NEDepthwiseConvolutionLayer::NEDepthwiseConvolutionLayer(std::shared_ptr<IMemoryManager> memory_manager)
342 : _depth_conv_func(DepthwiseConvolutionFunction::GENERIC), _func_optimized(std::move(memory_manager)), _func_generic()
343 {
344 }
345
configure(ITensor * input,const ITensor * weights,const ITensor * biases,ITensor * output,const PadStrideInfo & conv_info,unsigned int depth_multiplier,const ActivationLayerInfo & act_info,const Size2D & dilation)346 void NEDepthwiseConvolutionLayer::configure(ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info, unsigned int depth_multiplier,
347 const ActivationLayerInfo &act_info, const Size2D &dilation)
348 {
349 _depth_conv_func = get_depthwiseconvolution_function(input->info(), weights->info(), (biases != nullptr) ? biases->info() : nullptr, output->info(), conv_info, depth_multiplier, act_info, dilation);
350 switch(_depth_conv_func)
351 {
352 case DepthwiseConvolutionFunction::OPTIMIZED:
353 _func_optimized.configure(input, weights, biases, output, conv_info, depth_multiplier, act_info, dilation);
354 break;
355 case DepthwiseConvolutionFunction::GENERIC:
356 _func_generic.configure(input, weights, biases, output, conv_info, depth_multiplier, act_info, dilation);
357 break;
358 default:
359 ARM_COMPUTE_ERROR("Unsupported DepthwiseConvolutionFunction");
360 }
361 }
362
validate(const ITensorInfo * input,const ITensorInfo * weights,const ITensorInfo * biases,const ITensorInfo * output,const PadStrideInfo & conv_info,unsigned int depth_multiplier,const ActivationLayerInfo & act_info,const Size2D & dilation)363 Status NEDepthwiseConvolutionLayer::validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info,
364 unsigned int depth_multiplier, const ActivationLayerInfo &act_info, const Size2D &dilation)
365 {
366 DepthwiseConvolutionFunction depth_conv_func = get_depthwiseconvolution_function(input, weights, biases, output, conv_info, depth_multiplier, act_info, dilation);
367 switch(depth_conv_func)
368 {
369 case DepthwiseConvolutionFunction::OPTIMIZED:
370 return NEDepthwiseConvolutionLayerOptimizedInternal::validate(input, weights, biases, output, conv_info, depth_multiplier, act_info, dilation);
371 break;
372 case DepthwiseConvolutionFunction::GENERIC:
373 return NEDepthwiseConvolutionLayerGeneric::validate(input, weights, biases, output, conv_info, depth_multiplier, act_info, dilation);
374 break;
375 default:
376 ARM_COMPUTE_ERROR("Unsupported DepthwiseConvolutionFunction");
377 }
378 }
379
get_depthwiseconvolution_function(const ITensorInfo * input,const ITensorInfo * weights,const ITensorInfo * biases,const ITensorInfo * output,const PadStrideInfo & conv_info,unsigned int depth_multiplier,ActivationLayerInfo act_info,const Size2D & dilation)380 DepthwiseConvolutionFunction NEDepthwiseConvolutionLayer::get_depthwiseconvolution_function(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output,
381 const PadStrideInfo &conv_info,
382 unsigned int depth_multiplier, ActivationLayerInfo act_info, const Size2D &dilation)
383 {
384 if(bool(NEDepthwiseConvolutionLayerOptimizedInternal::validate(input, weights, biases, output, conv_info, depth_multiplier, act_info, dilation)))
385 {
386 return DepthwiseConvolutionFunction::OPTIMIZED;
387 }
388 else
389 {
390 return DepthwiseConvolutionFunction::GENERIC;
391 }
392 }
393
run()394 void NEDepthwiseConvolutionLayer::run()
395 {
396 switch(_depth_conv_func)
397 {
398 case DepthwiseConvolutionFunction::OPTIMIZED:
399 _func_optimized.run();
400 break;
401 case DepthwiseConvolutionFunction::GENERIC:
402 _func_generic.run();
403 break;
404 default:
405 ARM_COMPUTE_ERROR("DepthwiseConvolutionFunction not properly configured");
406 }
407 }
408
prepare()409 void NEDepthwiseConvolutionLayer::prepare()
410 {
411 switch(_depth_conv_func)
412 {
413 case DepthwiseConvolutionFunction::OPTIMIZED:
414 _func_optimized.prepare();
415 break;
416 case DepthwiseConvolutionFunction::GENERIC:
417 _func_generic.prepare();
418 break;
419 default:
420 ARM_COMPUTE_ERROR("DepthwiseConvolutionFunction not properly configured");
421 }
422 }
423 } // namespace arm_compute
424