• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //
2 // Copyright © 2017 Arm Ltd. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5 
6 #include "NeonDepthwiseConvolutionWorkload.hpp"
7 
8 #include "NeonWorkloadUtils.hpp"
9 
10 #include <armnnUtils/DataLayoutIndexed.hpp>
11 
12 #include <aclCommon/ArmComputeTensorUtils.hpp>
13 #include <aclCommon/ArmComputeUtils.hpp>
14 
15 #include <neon/NeonLayerSupport.hpp>
16 
17 #include <backendsCommon/CpuTensorHandle.hpp>
18 #include <backendsCommon/WorkloadUtils.hpp>
19 
20 #include <arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h>
21 
22 using namespace armnnUtils;
23 
24 namespace armnn
25 {
26 
27 using namespace armcomputetensorutils;
28 
NeonDepthwiseConvolutionWorkloadValidate(const TensorInfo & input,const TensorInfo & output,const DepthwiseConvolution2dDescriptor & descriptor,const TensorInfo & weights,const Optional<TensorInfo> & biases,const ActivationDescriptor * activationDescriptor)29 arm_compute::Status NeonDepthwiseConvolutionWorkloadValidate(const TensorInfo& input,
30                                                              const TensorInfo& output,
31                                                              const DepthwiseConvolution2dDescriptor& descriptor,
32                                                              const TensorInfo& weights,
33                                                              const Optional<TensorInfo>& biases,
34                                                              const ActivationDescriptor* activationDescriptor)
35 {
36     const arm_compute::TensorInfo aclInputInfo  = BuildArmComputeTensorInfo(input,  descriptor.m_DataLayout);
37     const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output, descriptor.m_DataLayout);
38 
39     // ArmNN's weight format is [ M, I, H, W ]
40     const unsigned int aclDepthMultiplier = weights.GetShape()[0];
41 
42     // Convert the weight format from ArmNN's [ M, I, H, W ] (does NOT depend on the data layout) to either
43     // [ 1, H, W, I * M ] (if NHWC) or [ 1, I * M, H, W ] (if NCHW), as required by the compute library
44     TensorInfo weightsPermuted = ConvertWeightTensorInfoFromArmnnToAcl(weights, descriptor.m_DataLayout);
45 
46     // Convert the weights into the compute library format
47     const arm_compute::TensorInfo aclWeightsInfo = BuildArmComputeTensorInfo(weightsPermuted, descriptor.m_DataLayout);
48 
49     arm_compute::TensorInfo aclBiasesInfo;
50     arm_compute::TensorInfo *optionalAclBiasesInfo = nullptr;
51 
52     if (descriptor.m_BiasEnabled)
53     {
54         ARMNN_ASSERT(biases.has_value());
55 
56         aclBiasesInfo = BuildArmComputeTensorInfo(biases.value(), descriptor.m_DataLayout);
57         optionalAclBiasesInfo = &aclBiasesInfo;
58     }
59 
60     arm_compute::PadStrideInfo aclPadStrideInfo = BuildArmComputePadStrideInfo(descriptor);
61     const arm_compute::Size2D aclDilationInfo = BuildArmComputeSize2D(
62             descriptor.m_DilationX,descriptor.m_DilationY);
63 
64     const arm_compute::ActivationLayerInfo activationInfo = ConvertActivationDescriptorToAclActivationLayerInfo(
65             activationDescriptor);
66 
67     return arm_compute::NEDepthwiseConvolutionLayer::validate(&aclInputInfo,
68                                                               &aclWeightsInfo,
69                                                               optionalAclBiasesInfo,
70                                                               &aclOutputInfo,
71                                                               aclPadStrideInfo,
72                                                               aclDepthMultiplier,
73                                                               activationInfo,
74                                                               aclDilationInfo);
75 }
76 
NeonDepthwiseConvolutionWorkload(const DepthwiseConvolution2dQueueDescriptor & descriptor,const WorkloadInfo & info)77 NeonDepthwiseConvolutionWorkload::NeonDepthwiseConvolutionWorkload(
78     const DepthwiseConvolution2dQueueDescriptor& descriptor,
79     const WorkloadInfo& info)
80     : BaseWorkload<DepthwiseConvolution2dQueueDescriptor>(descriptor, info)
81 {
82     // ArmNN's weight format is [ M, I, H, W ]
83     auto& weightInfo = m_Data.m_Weight->GetTensorInfo();
84 
85     // Allocate a buffer for the swizzling of the weight tensor
86     std::unique_ptr<unsigned char[]> permuteBuffer(new unsigned char[m_Data.m_Weight->GetTensorInfo().GetNumBytes()]);
87 
88     // Convert the weight format from ArmNN's [ M, I, H, W ] (does NOT depend on the data layout) to either
89     // [ 1, H, W, I * M ] (if NHWC) or [ 1, I * M, H, W ] (if NCHW), as required by the compute library
90     ConstTensor weightPermuted = ConvertWeightTensorFromArmnnToAcl(m_Data.m_Weight,
91                                                                    m_Data.m_Parameters.m_DataLayout,
92                                                                    permuteBuffer.get());
93 
94     // Convert the weights into the compute library format
95     m_KernelTensor = std::make_unique<arm_compute::Tensor>();
96     BuildArmComputeTensor(*m_KernelTensor, weightPermuted.GetInfo(), m_Data.m_Parameters.m_DataLayout);
97 
98     if (m_Data.m_Parameters.m_BiasEnabled)
99     {
100         m_BiasTensor = std::make_unique<arm_compute::Tensor>();
101         BuildArmComputeTensor(*m_BiasTensor, m_Data.m_Bias->GetTensorInfo(), m_Data.m_Parameters.m_DataLayout);
102     }
103 
104     const arm_compute::Size2D aclDilationInfo = BuildArmComputeSize2D(
105                 m_Data.m_Parameters.m_DilationX, m_Data.m_Parameters.m_DilationY);
106 
107     m_Data.ValidateInputsOutputs("NeonDepthwiseConvolutionWorkload", 1, 1);
108 
109     IAclTensorHandle* inputTensorHandle  = static_cast<IAclTensorHandle*>(m_Data.m_Inputs[0]);
110     IAclTensorHandle* outputTensorHandle = static_cast<IAclTensorHandle*>(m_Data.m_Outputs[0]);
111 
112     arm_compute::ITensor& input  = inputTensorHandle->GetTensor();
113     arm_compute::ITensor& output = outputTensorHandle->GetTensor();
114 
115     arm_compute::DataLayout aclDataLayout = ConvertDataLayout(m_Data.m_Parameters.m_DataLayout);
116     input.info()->set_data_layout(aclDataLayout);
117     output.info()->set_data_layout(aclDataLayout);
118 
119     // Get the depth multiplier
120     const unsigned int depthMultiplier = weightInfo.GetShape()[0];
121 
122     arm_compute::PadStrideInfo padStrideInfo = BuildArmComputePadStrideInfo(m_Data.m_Parameters);
123 
124     const arm_compute::ActivationLayerInfo activationInfo = ConvertAdditionalInfoToAclActivationLayerInfo(descriptor);
125 
126     m_pDepthwiseConvolutionLayer = std::make_unique<arm_compute::NEDepthwiseConvolutionLayer>();
127     static_cast<arm_compute::NEDepthwiseConvolutionLayer*>(
128         m_pDepthwiseConvolutionLayer.get())->configure(&input,
129                                                        m_KernelTensor.get(),
130                                                        m_BiasTensor.get(),
131                                                        &output,
132                                                        padStrideInfo,
133                                                        depthMultiplier,
134                                                        activationInfo,
135                                                        aclDilationInfo);
136 
137     ARMNN_ASSERT(m_pDepthwiseConvolutionLayer);
138 
139     ScopedCpuTensorHandle weightsPermutedHandle(weightPermuted);
140     InitializeArmComputeTensorData(*m_KernelTensor, &weightsPermutedHandle);
141 
142     if (m_Data.m_Parameters.m_BiasEnabled)
143     {
144         InitializeArmComputeTensorData(*m_BiasTensor, m_Data.m_Bias);
145     }
146 
147     m_pDepthwiseConvolutionLayer->prepare();
148     FreeUnusedTensors();
149 }
150 
Execute() const151 void NeonDepthwiseConvolutionWorkload::Execute() const
152 {
153     ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonDepthwiseConvolutionWorkload_Execute");
154     ARMNN_ASSERT(m_pDepthwiseConvolutionLayer);
155 
156     m_pDepthwiseConvolutionLayer->run();
157 }
158 
FreeUnusedTensors()159 void NeonDepthwiseConvolutionWorkload::FreeUnusedTensors()
160 {
161     FreeTensorIfUnused(m_KernelTensor);
162     FreeTensorIfUnused(m_BiasTensor);
163 }
164 
165 } //namespace armnn
166