1 //
2 // Copyright © 2017 Arm Ltd. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5
6 #include "NeonConcatWorkload.hpp"
7
8 #include "NeonWorkloadUtils.hpp"
9
10 #include <aclCommon/ArmComputeTensorUtils.hpp>
11 #include <armnn/utility/PolymorphicDowncast.hpp>
12 #include <backendsCommon/CpuTensorHandle.hpp>
13 #include <neon/NeonTensorHandle.hpp>
14
15 namespace armnn
16 {
17 using namespace armcomputetensorutils;
18
19 namespace
20 {
CalcAxis(const armnn::OriginsDescriptor & desc)21 size_t CalcAxis(const armnn::OriginsDescriptor& desc)
22 {
23 return (desc.GetNumDimensions() - desc.GetConcatAxis()) - 1;
24 }
25 } //namespace
26
NeonConcatWorkloadValidate(const std::vector<const TensorInfo * > & inputs,const TensorInfo & output,const OriginsDescriptor & descriptor)27 arm_compute::Status NeonConcatWorkloadValidate(const std::vector<const TensorInfo*>& inputs,
28 const TensorInfo& output,
29 const OriginsDescriptor& descriptor)
30
31 {
32 std::vector<arm_compute::TensorInfo> aclInputs;
33 for (const TensorInfo* input : inputs)
34 {
35 arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(*input, armnn::DataLayout::NCHW);
36 aclInputs.emplace_back(aclInputInfo);
37 }
38 const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output);
39 std::vector<const arm_compute::ITensorInfo*> aclInputPtrs;
40 for (arm_compute::ITensorInfo& input : aclInputs)
41 {
42 aclInputPtrs.emplace_back(&input);
43 }
44
45 size_t aclAxis = CalcAxis(descriptor);
46 return arm_compute::NEConcatenateLayer::validate(aclInputPtrs, &aclOutputInfo, aclAxis);
47 }
48
NeonConcatWorkload(const ConcatQueueDescriptor & descriptor,const WorkloadInfo & info)49 NeonConcatWorkload::NeonConcatWorkload(
50 const ConcatQueueDescriptor& descriptor, const WorkloadInfo& info)
51 : BaseWorkload<ConcatQueueDescriptor>(descriptor, info)
52 {
53 bool allInputsAreSubtensors = true;
54
55 // Check that all inputs are sub-tensors
56 for (auto input : descriptor.m_Inputs)
57 {
58 if (!input->GetParent())
59 {
60 // Non sub-tensor input found so we need to execute the concat function
61 allInputsAreSubtensors = false;
62 break;
63 }
64 }
65
66 if (allInputsAreSubtensors)
67 {
68 // Can skip configuring the concat function since it's not executed
69 return;
70 }
71
72 std::vector<const arm_compute::ITensor *> aclInputs;
73 for (auto input : m_Data.m_Inputs)
74 {
75 arm_compute::ITensor& aclInput = armnn::PolymorphicPointerDowncast<IAclTensorHandle>(input)->GetTensor();
76 aclInputs.emplace_back(&aclInput);
77 }
78 arm_compute::ITensor& output = armnn::PolymorphicPointerDowncast<IAclTensorHandle>(
79 m_Data.m_Outputs[0])->GetTensor();
80
81 // Create the layer function
82 m_Layer.reset(new arm_compute::NEConcatenateLayer());
83
84 // Configure input and output tensors
85 size_t aclAxis = CalcAxis(descriptor.m_Parameters);
86 m_Layer->configure(aclInputs, &output, aclAxis);
87
88 // Prepare
89 m_Layer->prepare();
90 }
91
Execute() const92 void NeonConcatWorkload::Execute() const
93 {
94 if (m_Layer)
95 {
96 ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonConcatWorkload_Execute");
97 m_Layer->run();
98 }
99 }
100
101 } //namespace armnn
102
103