1 //
2 // Copyright © 2017 Arm Ltd. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5
6 #include "NeonDepthwiseConvolutionWorkload.hpp"
7
8 #include "NeonWorkloadUtils.hpp"
9
10 #include <armnnUtils/DataLayoutIndexed.hpp>
11
12 #include <aclCommon/ArmComputeTensorUtils.hpp>
13 #include <aclCommon/ArmComputeUtils.hpp>
14
15 #include <neon/NeonLayerSupport.hpp>
16
17 #include <backendsCommon/CpuTensorHandle.hpp>
18 #include <backendsCommon/WorkloadUtils.hpp>
19
20 #include <arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h>
21
22 using namespace armnnUtils;
23
24 namespace armnn
25 {
26
27 using namespace armcomputetensorutils;
28
NeonDepthwiseConvolutionWorkloadValidate(const TensorInfo & input,const TensorInfo & output,const DepthwiseConvolution2dDescriptor & descriptor,const TensorInfo & weights,const Optional<TensorInfo> & biases,const ActivationDescriptor * activationDescriptor)29 arm_compute::Status NeonDepthwiseConvolutionWorkloadValidate(const TensorInfo& input,
30 const TensorInfo& output,
31 const DepthwiseConvolution2dDescriptor& descriptor,
32 const TensorInfo& weights,
33 const Optional<TensorInfo>& biases,
34 const ActivationDescriptor* activationDescriptor)
35 {
36 const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input, descriptor.m_DataLayout);
37 const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output, descriptor.m_DataLayout);
38
39 // ArmNN's weight format is [ M, I, H, W ]
40 const unsigned int aclDepthMultiplier = weights.GetShape()[0];
41
42 // Convert the weight format from ArmNN's [ M, I, H, W ] (does NOT depend on the data layout) to either
43 // [ 1, H, W, I * M ] (if NHWC) or [ 1, I * M, H, W ] (if NCHW), as required by the compute library
44 TensorInfo weightsPermuted = ConvertWeightTensorInfoFromArmnnToAcl(weights, descriptor.m_DataLayout);
45
46 // Convert the weights into the compute library format
47 const arm_compute::TensorInfo aclWeightsInfo = BuildArmComputeTensorInfo(weightsPermuted, descriptor.m_DataLayout);
48
49 arm_compute::TensorInfo aclBiasesInfo;
50 arm_compute::TensorInfo *optionalAclBiasesInfo = nullptr;
51
52 if (descriptor.m_BiasEnabled)
53 {
54 ARMNN_ASSERT(biases.has_value());
55
56 aclBiasesInfo = BuildArmComputeTensorInfo(biases.value(), descriptor.m_DataLayout);
57 optionalAclBiasesInfo = &aclBiasesInfo;
58 }
59
60 arm_compute::PadStrideInfo aclPadStrideInfo = BuildArmComputePadStrideInfo(descriptor);
61 const arm_compute::Size2D aclDilationInfo = BuildArmComputeSize2D(
62 descriptor.m_DilationX,descriptor.m_DilationY);
63
64 const arm_compute::ActivationLayerInfo activationInfo = ConvertActivationDescriptorToAclActivationLayerInfo(
65 activationDescriptor);
66
67 return arm_compute::NEDepthwiseConvolutionLayer::validate(&aclInputInfo,
68 &aclWeightsInfo,
69 optionalAclBiasesInfo,
70 &aclOutputInfo,
71 aclPadStrideInfo,
72 aclDepthMultiplier,
73 activationInfo,
74 aclDilationInfo);
75 }
76
NeonDepthwiseConvolutionWorkload(const DepthwiseConvolution2dQueueDescriptor & descriptor,const WorkloadInfo & info)77 NeonDepthwiseConvolutionWorkload::NeonDepthwiseConvolutionWorkload(
78 const DepthwiseConvolution2dQueueDescriptor& descriptor,
79 const WorkloadInfo& info)
80 : BaseWorkload<DepthwiseConvolution2dQueueDescriptor>(descriptor, info)
81 {
82 // ArmNN's weight format is [ M, I, H, W ]
83 auto& weightInfo = m_Data.m_Weight->GetTensorInfo();
84
85 // Allocate a buffer for the swizzling of the weight tensor
86 std::unique_ptr<unsigned char[]> permuteBuffer(new unsigned char[m_Data.m_Weight->GetTensorInfo().GetNumBytes()]);
87
88 // Convert the weight format from ArmNN's [ M, I, H, W ] (does NOT depend on the data layout) to either
89 // [ 1, H, W, I * M ] (if NHWC) or [ 1, I * M, H, W ] (if NCHW), as required by the compute library
90 ConstTensor weightPermuted = ConvertWeightTensorFromArmnnToAcl(m_Data.m_Weight,
91 m_Data.m_Parameters.m_DataLayout,
92 permuteBuffer.get());
93
94 // Convert the weights into the compute library format
95 m_KernelTensor = std::make_unique<arm_compute::Tensor>();
96 BuildArmComputeTensor(*m_KernelTensor, weightPermuted.GetInfo(), m_Data.m_Parameters.m_DataLayout);
97
98 if (m_Data.m_Parameters.m_BiasEnabled)
99 {
100 m_BiasTensor = std::make_unique<arm_compute::Tensor>();
101 BuildArmComputeTensor(*m_BiasTensor, m_Data.m_Bias->GetTensorInfo(), m_Data.m_Parameters.m_DataLayout);
102 }
103
104 const arm_compute::Size2D aclDilationInfo = BuildArmComputeSize2D(
105 m_Data.m_Parameters.m_DilationX, m_Data.m_Parameters.m_DilationY);
106
107 m_Data.ValidateInputsOutputs("NeonDepthwiseConvolutionWorkload", 1, 1);
108
109 IAclTensorHandle* inputTensorHandle = static_cast<IAclTensorHandle*>(m_Data.m_Inputs[0]);
110 IAclTensorHandle* outputTensorHandle = static_cast<IAclTensorHandle*>(m_Data.m_Outputs[0]);
111
112 arm_compute::ITensor& input = inputTensorHandle->GetTensor();
113 arm_compute::ITensor& output = outputTensorHandle->GetTensor();
114
115 arm_compute::DataLayout aclDataLayout = ConvertDataLayout(m_Data.m_Parameters.m_DataLayout);
116 input.info()->set_data_layout(aclDataLayout);
117 output.info()->set_data_layout(aclDataLayout);
118
119 // Get the depth multiplier
120 const unsigned int depthMultiplier = weightInfo.GetShape()[0];
121
122 arm_compute::PadStrideInfo padStrideInfo = BuildArmComputePadStrideInfo(m_Data.m_Parameters);
123
124 const arm_compute::ActivationLayerInfo activationInfo = ConvertAdditionalInfoToAclActivationLayerInfo(descriptor);
125
126 m_pDepthwiseConvolutionLayer = std::make_unique<arm_compute::NEDepthwiseConvolutionLayer>();
127 static_cast<arm_compute::NEDepthwiseConvolutionLayer*>(
128 m_pDepthwiseConvolutionLayer.get())->configure(&input,
129 m_KernelTensor.get(),
130 m_BiasTensor.get(),
131 &output,
132 padStrideInfo,
133 depthMultiplier,
134 activationInfo,
135 aclDilationInfo);
136
137 ARMNN_ASSERT(m_pDepthwiseConvolutionLayer);
138
139 ScopedCpuTensorHandle weightsPermutedHandle(weightPermuted);
140 InitializeArmComputeTensorData(*m_KernelTensor, &weightsPermutedHandle);
141
142 if (m_Data.m_Parameters.m_BiasEnabled)
143 {
144 InitializeArmComputeTensorData(*m_BiasTensor, m_Data.m_Bias);
145 }
146
147 m_pDepthwiseConvolutionLayer->prepare();
148 FreeUnusedTensors();
149 }
150
Execute() const151 void NeonDepthwiseConvolutionWorkload::Execute() const
152 {
153 ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonDepthwiseConvolutionWorkload_Execute");
154 ARMNN_ASSERT(m_pDepthwiseConvolutionLayer);
155
156 m_pDepthwiseConvolutionLayer->run();
157 }
158
FreeUnusedTensors()159 void NeonDepthwiseConvolutionWorkload::FreeUnusedTensors()
160 {
161 FreeTensorIfUnused(m_KernelTensor);
162 FreeTensorIfUnused(m_BiasTensor);
163 }
164
165 } //namespace armnn
166