• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //
2 // Copyright © 2017 Arm Ltd and Contributors. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5 
6 #include "ConcatTestImpl.hpp"
7 
8 #include <QuantizeHelper.hpp>
9 #include <ResolveType.hpp>
10 
11 
12 #include <armnnUtils/Permute.hpp>
13 
14 #include <backendsCommon/test/TensorCopyUtils.hpp>
15 #include <backendsCommon/test/WorkloadTestUtils.hpp>
16 
17 #include <test/TensorHelpers.hpp>
18 
19 using namespace armnn;
20 using namespace armnnUtils;
21 
22 //
23 // Helper functions and templates
24 //
25 
CreateDescriptorForConcat(const std::vector<TensorInfo> & inputTensorInfos,unsigned int concatDim)26 OriginsDescriptor CreateDescriptorForConcat(
27     const std::vector<TensorInfo> & inputTensorInfos,
28     unsigned int concatDim)
29 {
30     std::vector<TensorShape> shapes;
31     shapes.reserve(inputTensorInfos.size());
32     for (const TensorInfo& it: inputTensorInfos)
33     {
34         shapes.push_back(it.GetShape());
35     }
36 
37     return CreateDescriptorForConcatenation(shapes.begin(), shapes.end(), concatDim);
38 }
39 
40 //
41 // Concat is only supported for N and C dimensions for NCHW and the inner most dimension
42 // In case of <4 dimensions we need to make sure that the concat dimensions are at least
43 // the 3rd slowest iterating one or the inner most dimension.
44 //
45 
NeedPermuteForConcat(const std::vector<TensorInfo> & inputTensorInfos,unsigned int concatDim)46 bool NeedPermuteForConcat(
47     const std::vector<TensorInfo> & inputTensorInfos,
48     unsigned int concatDim)
49 {
50     // See note above. Additionally we expect the input shapes to have the
51     // same number of dimensions.
52     unsigned int nDimensions = 0;
53 
54     // Determine the number of dimensions as well as sanity check them
55     // agains test implementation issues.
56     for (auto && tensorInfo : inputTensorInfos)
57     {
58         if (!nDimensions)
59         {
60             nDimensions = tensorInfo.GetShape().GetNumDimensions();
61         }
62         else
63         {
64             ARMNN_ASSERT_MSG(nDimensions == tensorInfo.GetShape().GetNumDimensions(),
65                 "Input shapes must have the same number of dimensions");
66         }
67     }
68 
69     return (nDimensions < 3 || (nDimensions == 3 && (nDimensions-concatDim) < 3 && (nDimensions-concatDim) != 1));
70 }
71 
ExpandTensorShapeTo3dForPermute(const TensorShape & inputShape)72 TensorShape ExpandTensorShapeTo3dForPermute(const TensorShape & inputShape)
73 {
74     unsigned int numDims = inputShape.GetNumDimensions();
75     if (numDims >= 3)
76     {
77         // Nothing to do if the inputShape has at least 3 dimensions.
78         return inputShape;
79     }
80 
81     std::vector<unsigned int> newDims(size_t(3), 1u);
82     unsigned int expandedBy = 3 - numDims;
83     for (unsigned int i=0; i<numDims; ++i)
84     {
85         newDims[expandedBy+i] = inputShape[i];
86     }
87     return TensorShape(3u, &newDims[0]);
88 }
89 
Generate3dPermuteVectorForConcat(unsigned int numDimensions,unsigned int & concatDim,std::pair<PermutationVector,PermutationVector> & permutations)90 void Generate3dPermuteVectorForConcat(
91     unsigned int numDimensions,
92     unsigned int & concatDim,
93     std::pair<PermutationVector, PermutationVector> & permutations)
94 {
95     ARMNN_ASSERT_MSG(numDimensions <= 3,
96        "Only dimensions 1,2 and 3 are supported by this helper");
97     unsigned int expandedBy = 3 - numDimensions;
98     unsigned int expandedConcatAxis = concatDim + expandedBy;
99 
100     if (expandedConcatAxis == 2)
101     {
102         concatDim = 0;
103         PermutationVector forwardPermutation({1, 2, 0});
104         PermutationVector reversePermutation({2, 0, 1});
105         permutations = std::make_pair(forwardPermutation, reversePermutation);
106     }
107     else if (expandedConcatAxis == 1)
108     {
109         concatDim = 0;
110         PermutationVector forwardPermutation({2, 0, 1});
111         PermutationVector reversePermutation({1, 2, 0});
112         permutations = std::make_pair(forwardPermutation, reversePermutation);
113     }
114     else
115     {
116         ARMNN_ASSERT(expandedConcatAxis == 0);
117         concatDim = 0;
118     }
119 }
120 
PermuteTensorData(IWorkloadFactory & workloadFactory,const IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory,const PermutationVector & mappings,TensorInfo & inputTensorInfo,const T * inputData,std::vector<T> & outputData)121 template<typename T> void PermuteTensorData(
122     IWorkloadFactory& workloadFactory,
123     const IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
124     const armnn::ITensorHandleFactory& tensorHandleFactory,
125     const PermutationVector& mappings,
126     TensorInfo & inputTensorInfo,
127     const T * inputData,
128     std::vector<T>& outputData)
129 {
130     IgnoreUnused(memoryManager);
131     ARMNN_ASSERT_MSG(inputData != nullptr, "inputData must not be null");
132     if (inputData == nullptr)
133     {
134         // Nullptr is an error in the test. By returning without doing the concatenation
135         // I expect the caller to fail the test. It still makes sense to report this as
136         // an assert for Debug builds.
137         return;
138     }
139 
140     TensorInfo outputTensorInfo = armnnUtils::Permuted(inputTensorInfo, mappings);
141     std::unique_ptr<ITensorHandle> inputHandle = tensorHandleFactory.CreateTensorHandle(inputTensorInfo);
142     std::unique_ptr<ITensorHandle> outputHandle = tensorHandleFactory.CreateTensorHandle(outputTensorInfo);
143 
144     PermuteQueueDescriptor queueDescriptor;
145     queueDescriptor.m_Parameters = PermuteDescriptor{mappings};
146     WorkloadInfo workloadInfo;
147     AddInputToWorkload(queueDescriptor, workloadInfo, inputTensorInfo, inputHandle.get());
148     AddOutputToWorkload(queueDescriptor, workloadInfo, outputTensorInfo, outputHandle.get());
149 
150     std::unique_ptr<IWorkload> workload = workloadFactory.CreatePermute(queueDescriptor, workloadInfo);
151 
152     inputHandle->Allocate();
153     outputHandle->Allocate();
154 
155     CopyDataToITensorHandle(inputHandle.get(), inputData);
156 
157     workload->PostAllocationConfigure();
158     workload->Execute();
159 
160     outputData.resize(outputTensorInfo.GetNumElements());
161     CopyDataFromITensorHandle(&outputData[0], outputHandle.get());
162     inputTensorInfo = outputTensorInfo;
163 }
164 
165 //
166 // Permute the input tensors so we can do a supported concatenation.
167 // Also treat lower than 3d tensors as 3d by adding dummy 1 dimensions
168 // at the front. Finally this function tells what the output shape
169 // of the permuted concatenated tensor is going to be.
170 //
PermuteInputsForConcat(IWorkloadFactory & workloadFactory,const IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory,std::vector<TensorInfo> & inputTensorInfos,std::vector<T * > & inputData,std::vector<std::vector<T>> & inputDataStorage,PermutationVector & permuteVector,unsigned int & concatDim,TensorInfo & outputTensorInfo)171 template<typename T> void PermuteInputsForConcat(
172     IWorkloadFactory& workloadFactory,
173     const IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
174     const armnn::ITensorHandleFactory& tensorHandleFactory,
175     std::vector<TensorInfo> & inputTensorInfos,
176     std::vector<T *> & inputData,
177     std::vector<std::vector<T>> & inputDataStorage,
178     PermutationVector & permuteVector,
179     unsigned int & concatDim,
180     TensorInfo & outputTensorInfo)
181 {
182     IgnoreUnused(memoryManager);
183     ARMNN_ASSERT_MSG(inputTensorInfos.size() > 1,
184         "Expecting more than one tensor to be concatenated here");
185 
186     unsigned int numDims = 0;
187     unsigned int nthInput = 0;
188     const PermutationVector identity({0, 1, 2});
189 
190     std::pair<PermutationVector, PermutationVector> permutations =
191         std::make_pair(identity, identity);
192 
193     inputDataStorage.resize(inputData.size());
194 
195     for (auto && tensorInfo : inputTensorInfos)
196     {
197         if (numDims == 0)
198         {
199             numDims = tensorInfo.GetShape().GetNumDimensions();
200             Generate3dPermuteVectorForConcat(numDims, concatDim, permutations);
201 
202             // Store the reverese permutation.
203             permuteVector = permutations.second;
204             ARMNN_ASSERT_MSG(!permuteVector.IsEqual(identity),
205                 "Test logic error, we don't need permutation, so we shouldn't arrive here");
206         }
207         else
208         {
209             ARMNN_ASSERT_MSG(numDims == tensorInfo.GetShape().GetNumDimensions(),
210                 "All inputs must have the same number of dimensions");
211         }
212 
213         TensorInfo newTensorInfo = tensorInfo;
214         newTensorInfo.SetShape(ExpandTensorShapeTo3dForPermute(tensorInfo.GetShape()));
215 
216         PermuteTensorData<T>(workloadFactory,
217                              memoryManager,
218                              tensorHandleFactory,
219                              permutations.first,
220                              newTensorInfo,
221                              inputData[nthInput],
222                              inputDataStorage[nthInput]);
223 
224         inputData[nthInput] = inputDataStorage[nthInput].data();
225         inputTensorInfos[nthInput] = newTensorInfo;
226 
227         ++nthInput;
228     }
229 
230     outputTensorInfo.SetShape(
231         armnnUtils::Permuted(
232             ExpandTensorShapeTo3dForPermute(outputTensorInfo.GetShape()),
233             permutations.first));
234 }
235 
236 //
237 // This is the pair of PermuteInputsForConcat(...) which permutes back
238 // the output of the concatenation so we can check it against an expected
239 // output.
240 //
PermuteOutputForConcat(IWorkloadFactory & workloadFactory,const IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory,const TensorInfo & tensorInfo,const PermutationVector & permuteVector,std::unique_ptr<ITensorHandle> && inputDataHandle,T * data)241 template <typename T> void PermuteOutputForConcat(
242     IWorkloadFactory& workloadFactory,
243     const IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
244     const armnn::ITensorHandleFactory& tensorHandleFactory,
245     const TensorInfo & tensorInfo,
246     const PermutationVector & permuteVector,
247     std::unique_ptr<ITensorHandle> && inputDataHandle,
248     T * data)
249 {
250     ARMNN_ASSERT_MSG(data != nullptr, "data must not be null");
251     if (data == nullptr)
252     {
253         // Nullptr is an error in the test. By returning without doing the permutation
254         // I expect the caller to fail the test. It still makes sense to report this as
255         // an assert for Debug builds.
256         return;
257     }
258 
259     TensorInfo resultTensorInfo = tensorInfo;
260     std::vector<T> inputData(tensorInfo.GetNumElements());
261     std::vector<T> outputData;
262 
263     CopyDataFromITensorHandle(&inputData[0], inputDataHandle.get());
264 
265     PermuteTensorData<T>(workloadFactory,
266                          memoryManager,
267                          tensorHandleFactory,
268                          permuteVector,
269                          resultTensorInfo,
270                          &inputData[0],
271                          outputData);
272 
273     ::memcpy(data, &outputData[0], sizeof(T)*outputData.size());
274 }
275 
Concatenate(IWorkloadFactory & workloadFactory,const IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory,std::initializer_list<const TensorInfo> inputTensorInfosOrig,std::initializer_list<T * > inputsOrig,const TensorInfo & outputTensorInfoOrig,T * output,unsigned int concatDim,bool useSubtensor)276 template<typename T> void Concatenate(
277     IWorkloadFactory& workloadFactory,
278     const IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
279     const armnn::ITensorHandleFactory& tensorHandleFactory,
280     std::initializer_list<const TensorInfo> inputTensorInfosOrig,
281     std::initializer_list<T *> inputsOrig,
282     const TensorInfo& outputTensorInfoOrig,
283     T * output,
284     unsigned int concatDim,
285     bool useSubtensor)
286 {
287     ARMNN_ASSERT_MSG(output != nullptr, "output must not be null");
288     if (output == nullptr)
289     {
290         // Nullptr is an error in the test. By returning without doing the permutation
291         // I expect the caller to fail the test. It still makes sense to report this as
292         // an assert for Debug builds.
293         return;
294     }
295 
296     // Saves a copy of the parameters which we might need to change.
297     std::vector<TensorInfo> inputTensorInfos(inputTensorInfosOrig.begin(), inputTensorInfosOrig.end());
298     std::vector<T *> inputs            = inputsOrig;
299     TensorInfo outputTensorInfo = outputTensorInfoOrig;
300 
301     PermutationVector permuteVector{0, 1, 2};
302 
303     // Holds and automatically releases memory for the reshaped input data.
304     std::vector<std::vector<T>> tmpInputDataStorage;
305 
306     const size_t inputCount = inputTensorInfos.size();
307 
308     bool needPermuteForConcat = NeedPermuteForConcat(inputTensorInfos, concatDim);
309 
310     if (needPermuteForConcat)
311     {
312         //
313         // We need to permute the inputs, because concatenation along
314         // the requested axis is not supported.
315         //
316         PermuteInputsForConcat<T>(workloadFactory,
317                                   memoryManager,
318                                   tensorHandleFactory,
319                                   inputTensorInfos,
320                                   inputs,
321                                   tmpInputDataStorage,
322                                   permuteVector,
323                                   concatDim,
324                                   outputTensorInfo);
325     }
326 
327     WorkloadInfo workloadInfo;
328 
329     std::vector<std::unique_ptr<ITensorHandle>> inputHandles;
330     inputHandles.reserve(inputCount);
331 
332     std::unique_ptr<ITensorHandle> outputHandle = tensorHandleFactory.CreateTensorHandle(outputTensorInfo);
333 
334     ConcatQueueDescriptor queueDescriptor;
335     OriginsDescriptor viewsDescriptor = CreateDescriptorForConcat(inputTensorInfos, concatDim);
336     queueDescriptor.m_Parameters = viewsDescriptor;
337 
338     if (useSubtensor)
339     {
340         queueDescriptor.m_ViewOrigins.reserve(viewsDescriptor.GetNumViews());
341         for (unsigned int i = 0; i < viewsDescriptor.GetNumViews(); ++i)
342         {
343             queueDescriptor.m_ViewOrigins.emplace_back(std::vector<unsigned int>(viewsDescriptor.GetViewOrigin(i),
344                 viewsDescriptor.GetViewOrigin(i) + viewsDescriptor.GetNumDimensions()));
345         }
346 
347         outputHandle = tensorHandleFactory.CreateTensorHandle(outputTensorInfo);
348 
349         const bool subTensorsSupported = workloadFactory.SupportsSubTensors();
350         for (unsigned int i = 0; i < inputCount; ++i)
351         {
352             const TensorInfo& inputTensorInfo = inputTensorInfos[i];
353 
354             std::unique_ptr<ITensorHandle> inputHandle =
355                 subTensorsSupported ?
356                     tensorHandleFactory.CreateSubTensorHandle(*outputHandle,
357                                                           inputTensorInfo.GetShape(),
358                                                           queueDescriptor.m_ViewOrigins[i].m_Origin.data()) :
359                                                           tensorHandleFactory.CreateTensorHandle(inputTensorInfo);
360 
361             inputHandles.emplace_back(std::move(inputHandle));
362         }
363 
364 
365     }
366     else
367     {
368         for (unsigned int i = 0; i < inputCount; ++i)
369         {
370             std::unique_ptr<ITensorHandle> inputHandle = tensorHandleFactory.CreateTensorHandle(inputTensorInfos[i]);
371             inputHandles.emplace_back(std::move(inputHandle));
372         }
373     }
374 
375     for (unsigned int i = 0; i < inputCount; ++i)
376     {
377         AddInputToWorkload(queueDescriptor, workloadInfo, inputTensorInfos[i], inputHandles[i].get());
378     }
379 
380     AddOutputToWorkload(queueDescriptor, workloadInfo, outputTensorInfo, outputHandle.get());
381 
382     std::unique_ptr<IWorkload> workload = workloadFactory.CreateConcat(queueDescriptor, workloadInfo);
383 
384     for (auto& inputHandle : inputHandles)
385     {
386         inputHandle->Allocate();
387     }
388 
389     outputHandle->Allocate();
390 
391     unsigned int nextInputId = 0;
392     for (auto& inputHandle : inputHandles)
393     {
394         CopyDataToITensorHandle(inputHandle.get(), inputs[nextInputId]);
395         ++nextInputId;
396     }
397 
398     workload->PostAllocationConfigure();
399     workload->Execute();
400 
401     if (needPermuteForConcat)
402     {
403         PermuteOutputForConcat<T>(workloadFactory,
404                                   memoryManager,
405                                   tensorHandleFactory,
406                                   outputTensorInfo,
407                                   permuteVector,
408                                   std::move(outputHandle),
409                                   output);
410     }
411     else
412     {
413         CopyDataFromITensorHandle(output, outputHandle.get());
414     }
415 }
416 
417 //
418 // Implementation templates
419 //
420 
421 template<DataType ArmnnType, typename T = ResolveType<ArmnnType>>
Concat1dTestImpl(IWorkloadFactory & workloadFactory,const IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory,float qScale,int32_t qOffset)422 LayerTestResult<T, 1> Concat1dTestImpl(
423     IWorkloadFactory& workloadFactory,
424     const IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
425     const armnn::ITensorHandleFactory& tensorHandleFactory,
426     float qScale,
427     int32_t qOffset)
428 {
429     TensorInfo inputTensorInfo({ 3 }, ArmnnType, qScale, qOffset);
430 
431     auto input0 = MakeTensor<T, 1>(inputTensorInfo, QuantizedVector<T>({ 1.0f, 2.0f, 3.0f }, qScale, qOffset));
432     auto input1 = MakeTensor<T, 1>(inputTensorInfo, QuantizedVector<T>({ 4.0f, 5.0f, 6.0f }, qScale, qOffset));
433     auto input2 = MakeTensor<T, 1>(inputTensorInfo, QuantizedVector<T>({ 7.0f, 8.0f, 9.0f }, qScale, qOffset));
434 
435     TensorInfo outputTensorInfo({ 9 }, ArmnnType, qScale, qOffset);
436 
437     LayerTestResult<T, 1> result(outputTensorInfo);
438 
439     std::vector<T> output;
440     output.resize(outputTensorInfo.GetNumElements());
441     Concatenate<T>(workloadFactory, memoryManager, tensorHandleFactory,
442                    { inputTensorInfo, inputTensorInfo, inputTensorInfo },
443                    { input0.data(), input1.data(), input2.data() },
444                    outputTensorInfo,
445                    output.data(),
446                    0,
447                    true);
448 
449     result.output         = MakeTensor<T, 1>(outputTensorInfo, output);
450     result.outputExpected = MakeTensor<T, 1>(outputTensorInfo, QuantizedVector<T>(
451         {
452             1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f
453         },
454         qScale, qOffset));
455 
456     return result;
457 }
458 
459 template<DataType ArmnnType, typename T = ResolveType<ArmnnType>>
Concat2dTestImpl(IWorkloadFactory & workloadFactory,const IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory,const TensorInfo & outputTensorInfo,unsigned int dimension,const float qScale,const int32_t qOffset)460 LayerTestResult<T, 2> Concat2dTestImpl(
461     IWorkloadFactory& workloadFactory,
462     const IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
463     const armnn::ITensorHandleFactory& tensorHandleFactory,
464     const TensorInfo& outputTensorInfo,
465     unsigned int dimension,
466     const float qScale,
467     const int32_t qOffset)
468 {
469     TensorInfo inputTensorInfo({ 2, 3 }, ArmnnType, qScale, qOffset);
470 
471     auto input0 = MakeTensor<T, 2>(inputTensorInfo, QuantizedVector<T>(
472         {
473             // Batch 0
474             1.0f, 2.0f, 3.0f,
475 
476             // Batch 1
477             10.0f, 11.0f, 12.0f,
478         },
479         qScale, qOffset));
480 
481     auto input1 = MakeTensor<T, 2>(inputTensorInfo, QuantizedVector<T>(
482          {
483             // Batch 0
484             4.0f, 5.0f, 6.0f,
485 
486             // Batch 1
487             13.0f, 14.0f, 15.0f,
488         },
489         qScale, qOffset));
490 
491     auto input2 = MakeTensor<T, 2>(inputTensorInfo, QuantizedVector<T>(
492         {
493             // Batch 0
494             7.0f, 8.0f, 9.0f,
495 
496             // Batch 1
497             16.0f, 17.0f, 18.0f,
498         },
499         qScale, qOffset));
500 
501     LayerTestResult<T, 2> result(outputTensorInfo);
502 
503     std::vector<T> output;
504     output.resize(outputTensorInfo.GetNumElements());
505     Concatenate<T>(workloadFactory, memoryManager, tensorHandleFactory,
506                    { inputTensorInfo, inputTensorInfo, inputTensorInfo },
507                    { input0.data(), input1.data(), input2.data() },
508                    outputTensorInfo,
509                    output.data(),
510                    dimension,
511                    true);
512 
513     result.output = MakeTensor<T, 2>(outputTensorInfo, output);
514     return result;
515 }
516 
517 template<DataType ArmnnType, typename T = ResolveType<ArmnnType>>
Concat2dDim0TestImpl(IWorkloadFactory & workloadFactory,const IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory,float qScale,int32_t qOffset)518 LayerTestResult<T, 2> Concat2dDim0TestImpl(
519     IWorkloadFactory& workloadFactory,
520     const IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
521     const armnn::ITensorHandleFactory& tensorHandleFactory,
522     float qScale,
523     int32_t qOffset)
524 {
525     TensorInfo outputTensorInfo({ 6, 3 }, ArmnnType, qScale, qOffset);
526 
527     LayerTestResult<T, 2> result = Concat2dTestImpl<ArmnnType>(
528         workloadFactory, memoryManager, tensorHandleFactory, outputTensorInfo, 0, qScale, qOffset);
529 
530     result.outputExpected = MakeTensor<T, 2>(outputTensorInfo, QuantizedVector<T>(
531         {
532             // Batch 0
533             1.0f, 2.0f, 3.0f,
534 
535             // Batch 1
536             10.0f, 11.0f, 12.0f,
537 
538             // Batch 2
539             4.0f, 5.0f, 6.0f,
540 
541             // Batch 3
542             13.0f, 14.0f, 15.0f,
543 
544             // Batch 4
545             7.0f, 8.0f, 9.0f,
546 
547             // Batch 5
548             16.0f, 17.0f, 18.0f,
549         },
550         qScale, qOffset));
551 
552     return result;
553 }
554 
555 template<DataType ArmnnType, typename T = ResolveType<ArmnnType>>
Concat2dDim1TestImpl(IWorkloadFactory & workloadFactory,const IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory,float qScale,int32_t qOffset)556 LayerTestResult<T, 2> Concat2dDim1TestImpl(
557     IWorkloadFactory& workloadFactory,
558     const IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
559     const armnn::ITensorHandleFactory& tensorHandleFactory,
560     float qScale,
561     int32_t qOffset)
562 {
563     TensorInfo outputTensorInfo({ 2, 9 }, ArmnnType, qScale, qOffset);
564 
565     LayerTestResult<T, 2> result = Concat2dTestImpl<ArmnnType>(
566         workloadFactory, memoryManager, tensorHandleFactory, outputTensorInfo, 1, qScale, qOffset);
567 
568     result.outputExpected = MakeTensor<T, 2>(outputTensorInfo, QuantizedVector<T>(
569         {
570             // Batch 0
571             1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f,
572 
573             // Batch 1
574             10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0f, 17.0f, 18.0f
575         },
576         qScale, qOffset));
577 
578     return result;
579 }
580 
581 template<DataType ArmnnType, typename T = ResolveType<ArmnnType>>
Concat2dDim0DiffInputDimsTestImpl(IWorkloadFactory & workloadFactory,const IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory,float qScale,int32_t qOffset)582 LayerTestResult<T, 2> Concat2dDim0DiffInputDimsTestImpl(
583     IWorkloadFactory& workloadFactory,
584     const IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
585     const armnn::ITensorHandleFactory& tensorHandleFactory,
586     float qScale,
587     int32_t qOffset)
588 {
589     TensorInfo input0TensorInfo({ 2, 3 }, ArmnnType, qScale, qOffset);
590     auto input0 = MakeTensor<T, 2>(input0TensorInfo, QuantizedVector<T>(
591         {
592             // Batch 0
593             1.0f, 2.0f, 3.0f,
594 
595             // Batch 1
596             10.0f, 11.0f, 12.0f,
597         },
598         qScale, qOffset));
599 
600     TensorInfo input1TensorInfo({ 3, 3 }, ArmnnType, qScale, qOffset);
601     auto input1 = MakeTensor<T, 2>(input1TensorInfo, QuantizedVector<T>(
602         {
603             // Batch 0
604             4.0f, 5.0f, 6.0f,
605 
606             // Batch 1
607             13.0f, 14.0f, 15.0f,
608 
609             // Batch 0
610             7.0f, 8.0f, 9.0f,
611         },
612         qScale, qOffset));
613 
614     TensorInfo input2TensorInfo({ 1, 3 }, ArmnnType, qScale, qOffset);
615     auto input2 = MakeTensor<T, 2>(input2TensorInfo, QuantizedVector<T>(
616         {
617             // Batch 1
618             16.0f, 17.0f, 18.0f,
619         },
620         qScale, qOffset));
621 
622     TensorInfo outputTensorInfo({ 6, 3 }, ArmnnType, qScale, qOffset);
623     LayerTestResult<T, 2> result(outputTensorInfo);
624 
625     std::vector<T> output;
626     output.resize(outputTensorInfo.GetNumElements());
627     Concatenate<T>(workloadFactory, memoryManager, tensorHandleFactory,
628                    { input0TensorInfo, input1TensorInfo, input2TensorInfo },
629                    { input0.data(), input1.data(), input2.data() },
630                    outputTensorInfo,
631                    output.data(),
632                    0,
633                    true);
634 
635     result.output = MakeTensor<T, 2>(outputTensorInfo, output);
636     result.outputExpected = MakeTensor<T, 2>(outputTensorInfo, QuantizedVector<T>(
637         {
638             // Batch 0
639             1.0f, 2.0f, 3.0f,
640 
641             // Batch 1
642             10.0f, 11.0f, 12.0f,
643 
644             // Batch 2
645             4.0f, 5.0f, 6.0f,
646 
647             // Batch 3
648             13.0f, 14.0f, 15.0f,
649 
650             // Batch 4
651             7.0f, 8.0f, 9.0f,
652 
653             // Batch 5
654             16.0f, 17.0f, 18.0f,
655         },
656         qScale, qOffset));
657 
658     return result;
659 }
660 
661 template<DataType ArmnnType, typename T = ResolveType<ArmnnType>>
Concat2dDim1DiffInputDimsTestImpl(IWorkloadFactory & workloadFactory,const IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory,float qScale,int32_t qOffset)662 LayerTestResult<T, 2> Concat2dDim1DiffInputDimsTestImpl(
663     IWorkloadFactory& workloadFactory,
664     const IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
665     const armnn::ITensorHandleFactory& tensorHandleFactory,
666     float qScale,
667     int32_t qOffset)
668 {
669     TensorInfo input0TensorInfo({ 2, 3 }, ArmnnType, qScale, qOffset);
670     auto input0 = MakeTensor<T, 2>(input0TensorInfo, QuantizedVector<T>(
671         {
672             // Batch 0
673             1.0f, 2.0f, 3.0f,
674 
675             // Batch 1
676             10.0f, 11.0f, 12.0f,
677         },
678         qScale, qOffset));
679 
680     TensorInfo input1TensorInfo({ 2, 5 }, ArmnnType, qScale, qOffset);
681     auto input1 = MakeTensor<T, 2>(input1TensorInfo, QuantizedVector<T>(
682         {
683             // Batch 0
684             4.0f, 5.0f, 6.0f, 7.0f, 8.0f,
685 
686             // Batch 1
687             13.0f, 14.0f, 15.0f, 16.0f, 17.0f,
688         },
689         qScale, qOffset));
690 
691     TensorInfo input2TensorInfo({ 2, 1 }, ArmnnType, qScale, qOffset);
692     auto input2 = MakeTensor<T, 2>(input2TensorInfo, QuantizedVector<T>(
693         {
694             // Batch 0
695             9.0f,
696 
697             // Batch 1
698             18.0f
699         },
700         qScale, qOffset));
701 
702     TensorInfo outputTensorInfo({ 2, 9 }, ArmnnType, qScale, qOffset);
703     LayerTestResult<T, 2> result(outputTensorInfo);
704 
705     std::vector<T> output;
706     output.resize(outputTensorInfo.GetNumElements());
707     Concatenate<T>(workloadFactory, memoryManager, tensorHandleFactory,
708                    { input0TensorInfo, input1TensorInfo, input2TensorInfo },
709                    { input0.data(), input1.data(), input2.data() },
710                    outputTensorInfo,
711                    output.data(),
712                    1,
713                    true);
714 
715     result.output = MakeTensor<T, 2>(outputTensorInfo, output);
716     result.outputExpected = MakeTensor<T, 2>(outputTensorInfo, QuantizedVector<T>(
717         {
718             // Batch 0
719             1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f,
720 
721             // Batch 1
722             10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0f, 17.0f, 18.0f,
723         },
724         qScale, qOffset));
725 
726     return result;
727 }
728 
729 template<DataType ArmnnType, typename T = ResolveType<ArmnnType>>
Concat3dTestImpl(IWorkloadFactory & workloadFactory,const IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory,const TensorInfo & outputTensorInfo,unsigned int dimension,bool useSubtensor,float qScale,int32_t qOffset)730 LayerTestResult<T, 3> Concat3dTestImpl(
731     IWorkloadFactory& workloadFactory,
732     const IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
733     const armnn::ITensorHandleFactory& tensorHandleFactory,
734     const TensorInfo& outputTensorInfo,
735     unsigned int dimension,
736     bool useSubtensor,
737     float qScale,
738     int32_t qOffset)
739 {
740     TensorInfo inputTensorInfo({ 2, 3, 2 }, ArmnnType, qScale, qOffset);
741 
742     auto input0 = MakeTensor<T, 3>(inputTensorInfo, QuantizedVector<T>(
743         {
744             // Batch 0, Channel 0
745             1.0f, 2.0f,
746 
747             // Batch 0, Channel 1
748             3.0f, 4.0f,
749 
750             // Batch 0, Channel 2
751             5.0f, 6.0f,
752 
753             // Batch 1, Channel 0
754             19.0f, 20.0f,
755 
756             // Batch 1, Channel 1
757             21.0f, 22.0f,
758 
759             // Batch 1, Channel 2
760             23.0f, 24.0f
761         },
762         qScale, qOffset));
763 
764     auto input1 = MakeTensor<T, 3>(inputTensorInfo, QuantizedVector<T>(
765         {
766             // Batch 0, Channel 0
767             7.0f, 8.0f,
768 
769             // Batch 0, Channel 1
770             9.0f, 10.0f,
771 
772             // Batch 0, Channel 2
773             11.0f, 12.0f,
774 
775             // Batch 1, Channel 0
776             25.0f, 26.0f,
777 
778             // Batch 1, Channel 1
779             27.0f, 28.0f,
780 
781             // Batch 1, Channel 2
782             29.0f, 30.0f
783         },
784         qScale, qOffset));
785 
786     auto input2 = MakeTensor<T, 3>(inputTensorInfo, QuantizedVector<T>(
787         {
788             // Batch 0, Channel 0
789             13.0f, 14.0f,
790 
791             // Batch 0, Channel 1
792             15.0f, 16.0f,
793 
794             // Batch 0, Channel 2
795             17.0f, 18.0f,
796 
797             // Batch 1, Channel 0
798             31.0f, 32.0f,
799 
800             // Batch 1, Channel 1
801             33.0f, 34.0f,
802 
803             // Batch 1, Channel 2
804             35.0f, 36.0f
805         },
806         qScale, qOffset));
807 
808     LayerTestResult<T, 3> result(outputTensorInfo);
809 
810     std::vector<T> output;
811     output.resize(outputTensorInfo.GetNumElements());
812     Concatenate<T>(workloadFactory, memoryManager, tensorHandleFactory,
813                    { inputTensorInfo, inputTensorInfo, inputTensorInfo },
814                    { input0.data(), input1.data(), input2.data() },
815                    outputTensorInfo,
816                    output.data(),
817                    dimension,
818                    useSubtensor);
819 
820     result.output = MakeTensor<T, 3>(outputTensorInfo, output);
821     return result;
822 }
823 
824 template<DataType ArmnnType, typename T = ResolveType<ArmnnType>>
Concat3dDim0TestImpl(IWorkloadFactory & workloadFactory,const IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory,float qScale,int32_t qOffset)825 LayerTestResult<T, 3> Concat3dDim0TestImpl(
826     IWorkloadFactory& workloadFactory,
827     const IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
828     const armnn::ITensorHandleFactory& tensorHandleFactory,
829     float qScale,
830     int32_t qOffset)
831 {
832     TensorInfo outputTensorInfo({ 6, 3, 2 }, ArmnnType, qScale, qOffset);
833 
834     LayerTestResult<T, 3> result = Concat3dTestImpl<ArmnnType>(
835         workloadFactory, memoryManager, tensorHandleFactory, outputTensorInfo, 0, true, qScale, qOffset);
836 
837     result.outputExpected = MakeTensor<T, 3>(outputTensorInfo, QuantizedVector<T>(
838         {
839             // Batch 0, Channel 0
840             1.0f, 2.0f,
841 
842             // Batch 0, Channel 1
843             3.0f, 4.0f,
844 
845             // Batch 0, Channel 2
846             5.0f, 6.0f,
847 
848             // Batch 1, Channel 0
849             19.0f, 20.0f,
850 
851             // Batch 1, Channel 1
852             21.0f, 22.0f,
853 
854             // Batch 1, Channel 2
855             23.0f, 24.0f,
856 
857             // Batch 2, Channel 0
858             7.0f, 8.0f,
859 
860             // Batch 2, Channel 1
861             9.0f, 10.0f,
862 
863             // Batch 2, Channel 2
864             11.0f, 12.0f,
865 
866             // Batch 3, Channel 0
867             25.0f, 26.0f,
868 
869             // Batch 3, Channel 1
870             27.0f, 28.0f,
871 
872             // Batch 3, Channel 2
873             29.0f, 30.0f,
874 
875             // Batch 4, Channel 0
876             13.0f, 14.0f,
877 
878             // Batch 4, Channel 1
879             15.0f, 16.0f,
880 
881             // Batch 4, Channel 2
882             17.0f, 18.0f,
883 
884             // Batch 5, Channel 0
885             31.0f, 32.0f,
886 
887             // Batch 5, Channel 1
888             33.0f, 34.0f,
889 
890             // Batch 5, Channel 2
891             35.0f, 36.0f
892         },
893         qScale, qOffset));
894 
895     return result;
896 }
897 
898 template<DataType ArmnnType, typename T = ResolveType<ArmnnType>>
Concat3dDim1TestImpl(IWorkloadFactory & workloadFactory,const IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory,float qScale,int32_t qOffset)899 LayerTestResult<T, 3> Concat3dDim1TestImpl(
900     IWorkloadFactory& workloadFactory,
901     const IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
902     const armnn::ITensorHandleFactory& tensorHandleFactory,
903     float qScale,
904     int32_t qOffset)
905 {
906     TensorInfo outputTensorInfo({ 2, 9, 2 }, ArmnnType, qScale, qOffset);
907 
908     LayerTestResult<T, 3> result = Concat3dTestImpl<ArmnnType>(
909         workloadFactory, memoryManager, tensorHandleFactory, outputTensorInfo, 1, true, qScale, qOffset);
910 
911     result.outputExpected = MakeTensor<T, 3>(outputTensorInfo, QuantizedVector<T>(
912         {
913             // Batch 0, Channel 0
914             1.0f, 2.0f,
915 
916             // Batch 0, Channel 1
917             3.0f, 4.0f,
918 
919             // Batch 0, Channel 2
920             5.0f, 6.0f,
921 
922             // Batch 0, Channel 3
923             7.0f, 8.0f,
924 
925             // Batch 0, Channel 4
926             9.0f, 10.0f,
927 
928             // Batch 0, Channel 5
929             11.0f, 12.0f,
930 
931             // Batch 0, Channel 6
932             13.0f, 14.0f,
933 
934             // Batch 0, Channel 7
935             15.0f, 16.0f,
936 
937             // Batch 0, Channel 8
938             17.0f, 18.0f,
939 
940             // Batch 1, Channel 0
941             19.0f, 20.0f,
942 
943             // Batch 1, Channel 1
944             21.0f, 22.0f,
945 
946             // Batch 1, Channel 2
947             23.0f, 24.0f,
948 
949             // Batch 1, Channel 3
950             25.0f, 26.0f,
951 
952             // Batch 1, Channel 4
953             27.0f, 28.0f,
954 
955             // Batch 1, Channel 5
956             29.0f, 30.0f,
957 
958             // Batch 1, Channel 6
959             31.0f, 32.0f,
960 
961             // Batch 1, Channel 7
962             33.0f, 34.0f,
963 
964             // Batch 1, Channel 8
965             35.0f, 36.0f
966         },
967         qScale, qOffset));
968 
969     return result;
970 }
971 
972 template<DataType ArmnnType, typename T = ResolveType<ArmnnType>>
Concat3dDim2TestImpl(IWorkloadFactory & workloadFactory,const IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory,bool useSubtensor,float qScale,int32_t qOffset)973 LayerTestResult<T, 3> Concat3dDim2TestImpl(
974     IWorkloadFactory& workloadFactory,
975     const IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
976     const armnn::ITensorHandleFactory& tensorHandleFactory,
977     bool useSubtensor,
978     float qScale,
979     int32_t qOffset)
980 {
981     TensorInfo outputTensorInfo({ 2, 3, 6 }, ArmnnType, qScale, qOffset);
982 
983     LayerTestResult<T, 3> result = Concat3dTestImpl<ArmnnType>(
984         workloadFactory, memoryManager, tensorHandleFactory, outputTensorInfo, 2, useSubtensor, qScale, qOffset);
985 
986     result.outputExpected = MakeTensor<T, 3>(outputTensorInfo, QuantizedVector<T>(
987         {
988             // Batch 0, Channel 0
989             1.0f, 2.0f, 7.0f, 8.0f, 13.0f, 14.0f,
990 
991             // Batch 0, Channel 1
992             3.0f, 4.0f, 9.0f, 10.0f, 15.0f, 16.0f,
993 
994             // Batch 0, Channel 2
995             5.0f, 6.0f, 11.0f, 12.0f, 17.0f, 18.0f,
996 
997             // Batch 1, Channel 0
998             19.0f, 20.0f, 25.0f, 26.0f, 31.0f, 32.0f,
999 
1000             // Batch 1, Channel 1
1001             21.0f, 22.0f, 27.0f, 28.0f, 33.0f, 34.0f,
1002 
1003             // Batch 1, Channel 2
1004             23.0f, 24.0f, 29.0f, 30.0f, 35.0f, 36.0f,
1005         },
1006         qScale, qOffset));
1007 
1008     return result;
1009 }
1010 
1011 template<DataType ArmnnType, typename T = ResolveType<ArmnnType>>
Concat3dDim0DiffInputDimsTestImpl(IWorkloadFactory & workloadFactory,const IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory,float qScale,int32_t qOffset)1012 LayerTestResult<T, 3> Concat3dDim0DiffInputDimsTestImpl(
1013     IWorkloadFactory& workloadFactory,
1014     const IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
1015     const armnn::ITensorHandleFactory& tensorHandleFactory,
1016     float qScale,
1017     int32_t qOffset)
1018 {
1019     TensorInfo input0TensorInfo({ 2, 3, 2 }, ArmnnType);
1020     auto input0 = MakeTensor<T, 3>(input0TensorInfo, QuantizedVector<T>(
1021         {
1022             // Batch 0, Channel 0
1023             1.0f, 2.0f,
1024 
1025             // Batch 0, Channel 1
1026             3.0f, 4.0f,
1027 
1028             // Batch 0, Channel 2
1029             5.0f, 6.0f,
1030 
1031             // Batch 1, Channel 0
1032             19.0f, 20.0f,
1033 
1034             // Batch 1, Channel 1
1035             21.0f, 22.0f,
1036 
1037             // Batch 1, Channel 2
1038             23.0f, 24.0f
1039         },
1040         qScale, qOffset));
1041 
1042     TensorInfo input1TensorInfo({ 1, 3, 2 }, ArmnnType);
1043     auto input1 = MakeTensor<T, 3>(input1TensorInfo, QuantizedVector<T>(
1044         {
1045             // Batch 0, Channel 0
1046             7.0f, 8.0f,
1047 
1048             // Batch 0, Channel 1
1049             9.0f, 10.0f,
1050 
1051             // Batch 0, Channel 2
1052             11.0f, 12.0f,
1053         },
1054         qScale, qOffset));
1055 
1056     TensorInfo input2TensorInfo({ 3, 3, 2 }, ArmnnType);
1057     auto input2 = MakeTensor<T, 3>(input2TensorInfo, QuantizedVector<T>(
1058         {
1059             // Batch 0, Channel 0
1060             25.0f, 26.0f,
1061 
1062             // Batch 0, Channel 1
1063             27.0f, 28.0f,
1064 
1065             // Batch 0, Channel 2
1066             29.0f, 30.0f,
1067 
1068             // Batch 1, Channel 0
1069             13.0f, 14.0f,
1070 
1071             // Batch 1, Channel 1
1072             15.0f, 16.0f,
1073 
1074             // Batch 1, Channel 2
1075             17.0f, 18.0f,
1076 
1077             // Batch 2, Channel 0
1078             31.0f, 32.0f,
1079 
1080             // Batch 2, Channel 1
1081             33.0f, 34.0f,
1082 
1083             // Batch 2, Channel 2
1084             35.0f, 36.0f
1085         },
1086         qScale, qOffset));
1087 
1088     TensorInfo outputTensorInfo({ 6, 3, 2 }, ArmnnType);
1089     LayerTestResult<T, 3> result(outputTensorInfo);
1090 
1091     std::vector<T> output;
1092     output.resize(outputTensorInfo.GetNumElements());
1093     Concatenate<T>(workloadFactory, memoryManager, tensorHandleFactory,
1094                    { input0TensorInfo, input1TensorInfo, input2TensorInfo },
1095                    { input0.data(), input1.data(), input2.data() },
1096                    outputTensorInfo,
1097                    output.data(),
1098                    0,
1099                    true);
1100 
1101     result.output = MakeTensor<T, 3>(outputTensorInfo, output);
1102     result.outputExpected = MakeTensor<T, 3>(outputTensorInfo, QuantizedVector<T>(
1103         {
1104             // Batch 0, Channel 0
1105             1.0f, 2.0f,
1106 
1107             // Batch 0, Channel 1
1108             3.0f, 4.0f,
1109 
1110             // Batch 0, Channel 2
1111             5.0f, 6.0f,
1112 
1113             // Batch 1, Channel 0
1114             19.0f, 20.0f,
1115 
1116             // Batch 1, Channel 1
1117             21.0f, 22.0f,
1118 
1119             // Batch 1, Channel 2
1120             23.0f, 24.0f,
1121 
1122             // Batch 2, Channel 0
1123             7.0f, 8.0f,
1124 
1125             // Batch 2, Channel 1
1126             9.0f, 10.0f,
1127 
1128             // Batch 2, Channel 2
1129             11.0f, 12.0f,
1130 
1131             // Batch 3, Channel 0
1132             25.0f, 26.0f,
1133 
1134             // Batch 3, Channel 1
1135             27.0f, 28.0f,
1136 
1137             // Batch 3, Channel 2
1138             29.0f, 30.0f,
1139 
1140             // Batch 4, Channel 0
1141             13.0f, 14.0f,
1142 
1143             // Batch 4, Channel 1
1144             15.0f, 16.0f,
1145 
1146             // Batch 4, Channel 2
1147             17.0f, 18.0f,
1148 
1149             // Batch 5, Channel 0
1150             31.0f, 32.0f,
1151 
1152             // Batch 5, Channel 1
1153             33.0f, 34.0f,
1154 
1155             // Batch 5, Channel 2
1156             35.0f, 36.0f
1157         },
1158         qScale, qOffset));
1159 
1160     return result;
1161 }
1162 
1163 template<DataType ArmnnType, typename T = ResolveType<ArmnnType>>
Concat3dDim1DiffInputDimsTestImpl(IWorkloadFactory & workloadFactory,const IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory,float qScale,int32_t qOffset)1164 LayerTestResult<T, 3> Concat3dDim1DiffInputDimsTestImpl(
1165     IWorkloadFactory& workloadFactory,
1166     const IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
1167     const armnn::ITensorHandleFactory& tensorHandleFactory,
1168     float qScale,
1169     int32_t qOffset)
1170 {
1171     TensorInfo input0TensorInfo({ 2, 3, 2 }, ArmnnType, qScale, qOffset);
1172     auto input0 = MakeTensor<T, 3>(input0TensorInfo, QuantizedVector<T>(
1173         {
1174             // Batch 0, Channel 0
1175             1.0f, 2.0f,
1176 
1177             // Batch 0, Channel 1
1178             3.0f, 4.0f,
1179 
1180             // Batch 0, Channel 2
1181             5.0f, 6.0f,
1182 
1183             // Batch 1, Channel 0
1184             19.0f, 20.0f,
1185 
1186             // Batch 1, Channel 1
1187             21.0f, 22.0f,
1188 
1189             // Batch 1, Channel 2
1190             23.0f, 24.0f
1191         },
1192         qScale, qOffset));
1193 
1194     TensorInfo input1TensorInfo({ 2, 4, 2 }, ArmnnType, qScale, qOffset);
1195     auto input1 = MakeTensor<T, 3>(input1TensorInfo, QuantizedVector<T>(
1196         {
1197             // Batch 0, Channel 0
1198             7.0f, 8.0f,
1199 
1200             // Batch 0, Channel 1
1201             9.0f, 10.0f,
1202 
1203             // Batch 0, Channel 2
1204             11.0f, 12.0f,
1205 
1206             // Batch 0, Channel 3
1207             25.0f, 26.0f,
1208 
1209             // Batch 1, Channel 0
1210             27.0f, 28.0f,
1211 
1212             // Batch 1, Channel 1
1213             29.0f, 30.0f,
1214 
1215             // Batch 1, Channel 2
1216             13.0f, 14.0f,
1217 
1218             // Batch 1, Channel 3
1219             15.0f, 16.0f,
1220         },
1221         qScale, qOffset));
1222 
1223     TensorInfo input2TensorInfo({ 2, 1, 2 }, ArmnnType, qScale, qOffset);
1224     auto input2 = MakeTensor<T, 3>(input2TensorInfo, QuantizedVector<T>(
1225         {
1226             // Batch 0, Channel 0
1227             17.0f, 18.0f,
1228 
1229             // Batch 1, Channel 0
1230             31.0f, 32.0f,
1231         },
1232         qScale, qOffset));
1233 
1234     TensorInfo outputTensorInfo({ 2, 8, 2 }, ArmnnType, qScale, qOffset);
1235     LayerTestResult<T, 3> result(outputTensorInfo);
1236 
1237     std::vector<T> output;
1238     output.resize(outputTensorInfo.GetNumElements());
1239     Concatenate<T>(workloadFactory, memoryManager, tensorHandleFactory,
1240                    { input0TensorInfo, input1TensorInfo, input2TensorInfo },
1241                    { input0.data(), input1.data(), input2.data() },
1242                    outputTensorInfo,
1243                    output.data(),
1244                    1,
1245                    true);
1246 
1247     result.output = MakeTensor<T, 3>(outputTensorInfo, output);
1248     result.outputExpected = MakeTensor<T, 3>(outputTensorInfo, QuantizedVector<T>(
1249         {
1250             // Batch 0, Channel 0
1251             1.0f, 2.0f,
1252 
1253             // Batch 0, Channel 1
1254             3.0f, 4.0f,
1255 
1256             // Batch 0, Channel 2
1257             5.0f, 6.0f,
1258 
1259             // Batch 0, Channel 3
1260             7.0f, 8.0f,
1261 
1262             // Batch 0, Channel 4
1263             9.0f, 10.0f,
1264 
1265             // Batch 0, Channel 5
1266             11.0f, 12.0f,
1267 
1268             // Batch 0, Channel 6
1269             25.0f, 26.0f,
1270 
1271             // Batch 0, Channel 7
1272             17.0f, 18.0f,
1273 
1274             // Batch 1, Channel 0
1275             19.0f, 20.0f,
1276 
1277             // Batch 1, Channel 1
1278             21.0f, 22.0f,
1279 
1280             // Batch 1, Channel 2
1281             23.0f, 24.0f,
1282 
1283             // Batch 1, Channel 3
1284             27.0f, 28.0f,
1285 
1286             // Batch 1, Channel 4
1287             29.0f, 30.0f,
1288 
1289             // Batch 1, Channel 5
1290             13.0f, 14.0f,
1291 
1292             // Batch 1, Channel 6
1293             15.0f, 16.0f,
1294 
1295             // Batch 1, Channel 7
1296             31.0f, 32.0f,
1297         },
1298         qScale, qOffset));
1299 
1300     return result;
1301 }
1302 
1303 template<DataType ArmnnType, typename T = ResolveType<ArmnnType>>
Concat3dDim2DiffInputDimsTestImpl(IWorkloadFactory & workloadFactory,const IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory,bool useSubtensor,float qScale,int32_t qOffset)1304 LayerTestResult<T, 3> Concat3dDim2DiffInputDimsTestImpl(
1305     IWorkloadFactory& workloadFactory,
1306     const IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
1307     const armnn::ITensorHandleFactory& tensorHandleFactory,
1308     bool useSubtensor,
1309     float qScale,
1310     int32_t qOffset)
1311 {
1312     TensorInfo input0TensorInfo({ 2, 3, 2 }, ArmnnType, qScale, qOffset);
1313     auto input0 = MakeTensor<T, 3>(input0TensorInfo, QuantizedVector<T>(
1314         {
1315             // Batch 0, Channel 0
1316             1.0f, 2.0f,
1317 
1318             // Batch 0, Channel 1
1319             3.0f, 4.0f,
1320 
1321             // Batch 0, Channel 2
1322             5.0f, 6.0f,
1323 
1324             // Batch 1, Channel 0
1325             19.0f, 20.0f,
1326 
1327             // Batch 1, Channel 1
1328             21.0f, 22.0f,
1329 
1330             // Batch 1, Channel 2
1331             23.0f, 24.0f
1332         },
1333         qScale, qOffset));
1334 
1335     TensorInfo input1TensorInfo({ 2, 3, 1 }, ArmnnType, qScale, qOffset);
1336     auto input1 = MakeTensor<T, 3>(input1TensorInfo, QuantizedVector<T>(
1337         {
1338             // Batch 0, Channel 0
1339             7.0f,
1340 
1341             // Batch 0, Channel 1
1342             9.0f,
1343 
1344             // Batch 0, Channel 2
1345             11.0f,
1346 
1347             // Batch 1, Channel 0
1348             25.0f,
1349 
1350             // Batch 1, Channel 1
1351             27.0f,
1352 
1353             // Batch 1, Channel 2
1354             29.0f
1355         },
1356         qScale, qOffset));
1357 
1358     TensorInfo input2TensorInfo({ 2, 3, 3 }, ArmnnType, qScale, qOffset);
1359     auto input2 = MakeTensor<T, 3>(input2TensorInfo, QuantizedVector<T>(
1360         {
1361             // Batch 0, Channel 0
1362             13.0f, 14.0f, 50.0f,
1363 
1364             // Batch 0, Channel 1
1365             15.0f, 16.0f, 51.0f,
1366 
1367             // Batch 0, Channel 2
1368             17.0f, 18.0f, 52.0f,
1369 
1370             // Batch 1, Channel 0
1371             31.0f, 32.0f, 53.0f,
1372 
1373             // Batch 1, Channel 1
1374             33.0f, 34.0f, 54.0f,
1375 
1376             // Batch 1, Channel 2
1377             35.0f, 36.0f, 55.0f,
1378         },
1379         qScale, qOffset));
1380 
1381     TensorInfo outputTensorInfo({ 2, 3, 6 }, ArmnnType, qScale, qOffset);
1382     LayerTestResult<T, 3> result(outputTensorInfo);
1383 
1384     std::vector<T> output;
1385     output.resize(outputTensorInfo.GetNumElements());
1386     Concatenate<T>(workloadFactory, memoryManager, tensorHandleFactory,
1387                    { input0TensorInfo, input1TensorInfo, input2TensorInfo },
1388                    { input0.data(), input1.data(), input2.data() },
1389                    outputTensorInfo,
1390                    output.data(),
1391                    2,
1392                    useSubtensor);
1393 
1394     result.output = MakeTensor<T, 3>(outputTensorInfo, output);
1395     result.outputExpected = MakeTensor<T, 3>(outputTensorInfo, QuantizedVector<T>(
1396         {
1397             // Batch 0, Channel 0
1398             1.0f, 2.0f, 7.0f, 13.0f, 14.0f, 50.0f,
1399 
1400             // Batch 0, Channel 1
1401             3.0f, 4.0f, 9.0f, 15.0f, 16.0f, 51.0f,
1402 
1403             // Batch 0, Channel 2
1404             5.0f, 6.0f, 11.0f, 17.0f, 18.0f, 52.0f,
1405 
1406             // Batch 1, Channel 0
1407             19.0f, 20.0f, 25.0f, 31.0f, 32.0f, 53.0f,
1408 
1409             // Batch 1, Channel 1
1410             21.0f, 22.0f, 27.0f, 33.0f, 34.0f, 54.0f,
1411 
1412             // Batch 1, Channel 2
1413             23.0f, 24.0f, 29.0f, 35.0f, 36.0f, 55.0f,
1414         },
1415         qScale, qOffset));
1416 
1417     return result;
1418 }
1419 
1420 template<DataType ArmnnType, typename T = ResolveType<ArmnnType>>
Concat4dTestImpl(IWorkloadFactory & workloadFactory,const IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory,const TensorInfo & outputTensorInfo,unsigned int dimension,bool useSubtensor,float qScale,int32_t qOffset)1421 LayerTestResult<T, 4> Concat4dTestImpl(
1422     IWorkloadFactory& workloadFactory,
1423     const IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
1424     const armnn::ITensorHandleFactory& tensorHandleFactory,
1425     const TensorInfo& outputTensorInfo,
1426     unsigned int dimension,
1427     bool useSubtensor,
1428     float qScale,
1429     int32_t qOffset)
1430 {
1431     TensorInfo inputTensorInfo({ 1, 3, 2, 2 }, ArmnnType, qScale, qOffset);
1432 
1433     auto input0 = MakeTensor<T, 4>(inputTensorInfo, QuantizedVector<T>(
1434         {
1435              1.0f,  2.0f,
1436              3.0f,  4.0f,
1437              5.0f,  6.0f,
1438              7.0f,  8.0f,
1439              9.0f, 10.0f,
1440             11.0f, 12.0f
1441         },
1442         qScale, qOffset));
1443 
1444     auto input1 = MakeTensor<T, 4>(inputTensorInfo, QuantizedVector<T>(
1445         {
1446             11.0f, 12.0f,
1447             13.0f, 14.0f,
1448             15.0f, 16.0f,
1449             17.0f, 18.0f,
1450             19.0f, 20.0f,
1451             21.0f, 22.0f
1452         },
1453         qScale, qOffset));
1454 
1455     auto input2 = MakeTensor<T, 4>(inputTensorInfo, QuantizedVector<T>(
1456         {
1457             21.0f, 22.0f,
1458             23.0f, 24.0f,
1459             25.0f, 26.0f,
1460             27.0f, 28.0f,
1461             29.0f, 30.0f,
1462             31.0f, 32.0f
1463         },
1464         qScale, qOffset));
1465 
1466     LayerTestResult<T, 4> result(outputTensorInfo);
1467 
1468     std::vector<T> output;
1469     output.resize(outputTensorInfo.GetNumElements());
1470 
1471     Concatenate<T>(workloadFactory,
1472                    memoryManager,
1473                    tensorHandleFactory,
1474                    {inputTensorInfo, inputTensorInfo, inputTensorInfo},
1475                    {input0.data(), input1.data(), input2.data()},
1476                    outputTensorInfo,
1477                    output.data(),
1478                    dimension,
1479                    useSubtensor);
1480 
1481     result.output = MakeTensor<T, 4>(outputTensorInfo, output);
1482     return result;
1483 }
1484 
1485 template<DataType ArmnnType, typename T = ResolveType<ArmnnType>>
Concat4dDim0TestImpl(IWorkloadFactory & workloadFactory,const IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory,float qScale,int32_t qOffset)1486 LayerTestResult<T, 4> Concat4dDim0TestImpl(
1487     IWorkloadFactory& workloadFactory,
1488     const IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
1489     const armnn::ITensorHandleFactory& tensorHandleFactory,
1490     float qScale,
1491     int32_t qOffset)
1492 {
1493     TensorInfo outputTensorInfo({ 3, 3, 2, 2 }, ArmnnType, qScale, qOffset);
1494 
1495     LayerTestResult<T, 4> result = Concat4dTestImpl<ArmnnType>(
1496         workloadFactory, memoryManager, tensorHandleFactory, outputTensorInfo, 0, true, qScale, qOffset);
1497 
1498     result.outputExpected = MakeTensor<T, 4>(outputTensorInfo, QuantizedVector<T>(
1499         {
1500              1.0f,  2.0f,
1501              3.0f,  4.0f,
1502              5.0f,  6.0f,
1503              7.0f,  8.0f,
1504              9.0f, 10.0f,
1505             11.0f, 12.0f,
1506 
1507             11.0f, 12.0f,
1508             13.0f, 14.0f,
1509             15.0f, 16.0f,
1510             17.0f, 18.0f,
1511             19.0f, 20.0f,
1512             21.0f, 22.0f,
1513 
1514             21.0f, 22.0f,
1515             23.0f, 24.0f,
1516             25.0f, 26.0f,
1517             27.0f, 28.0f,
1518             29.0f, 30.0f,
1519             31.0f, 32.0f
1520         },
1521         qScale, qOffset));
1522 
1523     return result;
1524 }
1525 
1526 template<DataType ArmnnType, typename T = ResolveType<ArmnnType>>
Concat4dDim1TestImpl(IWorkloadFactory & workloadFactory,const IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory,float qScale,int32_t qOffset)1527 LayerTestResult<T, 4> Concat4dDim1TestImpl(
1528     IWorkloadFactory& workloadFactory,
1529     const IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
1530     const armnn::ITensorHandleFactory& tensorHandleFactory,
1531     float qScale,
1532     int32_t qOffset)
1533 {
1534     TensorInfo outputTensorInfo({ 1, 9, 2, 2 }, ArmnnType, qScale, qOffset);
1535 
1536     LayerTestResult<T, 4> result = Concat4dTestImpl<ArmnnType>(
1537         workloadFactory, memoryManager, tensorHandleFactory, outputTensorInfo, 1, true, qScale, qOffset);
1538 
1539     result.outputExpected = MakeTensor<T, 4>(outputTensorInfo, QuantizedVector<T>(
1540         {
1541              1.0f,  2.0f,
1542              3.0f,  4.0f,
1543              5.0f,  6.0f,
1544              7.0f,  8.0f,
1545              9.0f, 10.0f,
1546             11.0f, 12.0f,
1547 
1548             11.0f, 12.0f,
1549             13.0f, 14.0f,
1550             15.0f, 16.0f,
1551             17.0f, 18.0f,
1552             19.0f, 20.0f,
1553             21.0f, 22.0f,
1554 
1555             21.0f, 22.0f,
1556             23.0f, 24.0f,
1557             25.0f, 26.0f,
1558             27.0f, 28.0f,
1559             29.0f, 30.0f,
1560             31.0f, 32.0f
1561         },
1562         qScale, qOffset));
1563 
1564     return result;
1565 }
1566 
1567 template<DataType ArmnnType, typename T = ResolveType<ArmnnType>>
Concat4dDim2TestImpl(IWorkloadFactory & workloadFactory,const IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory,float qScale,int32_t qOffset)1568 LayerTestResult<T, 4> Concat4dDim2TestImpl(
1569     IWorkloadFactory& workloadFactory,
1570     const IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
1571     const armnn::ITensorHandleFactory& tensorHandleFactory,
1572     float qScale,
1573     int32_t qOffset)
1574 {
1575     TensorInfo outputTensorInfo({ 1, 3, 6, 2 }, ArmnnType, qScale, qOffset);
1576 
1577     LayerTestResult<T, 4> result = Concat4dTestImpl<ArmnnType>(
1578         workloadFactory, memoryManager, tensorHandleFactory, outputTensorInfo, 2, true, qScale, qOffset);
1579 
1580     result.outputExpected = MakeTensor<T, 4>(outputTensorInfo, QuantizedVector<T>(
1581         {
1582              1.0f,  2.0f,
1583              3.0f,  4.0f,
1584             11.0f, 12.0f,
1585             13.0f, 14.0f,
1586             21.0f, 22.0f,
1587             23.0f, 24.0f,
1588 
1589              5.0f,  6.0f,
1590              7.0f,  8.0f,
1591             15.0f, 16.0f,
1592             17.0f, 18.0f,
1593             25.0f, 26.0f,
1594             27.0f, 28.0f,
1595 
1596              9.0f, 10.0f,
1597             11.0f, 12.0f,
1598             19.0f, 20.0f,
1599             21.0f, 22.0f,
1600             29.0f, 30.0f,
1601             31.0f, 32.0f
1602         },
1603         qScale, qOffset));
1604 
1605     return result;
1606 }
1607 
1608 template<DataType ArmnnType, typename T = ResolveType<ArmnnType>>
Concat4dDim3TestImpl(IWorkloadFactory & workloadFactory,const IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory,float qScale,int32_t qOffset,bool useSubtensor)1609 LayerTestResult<T, 4> Concat4dDim3TestImpl(
1610     IWorkloadFactory& workloadFactory,
1611     const IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
1612     const armnn::ITensorHandleFactory& tensorHandleFactory,
1613     float qScale,
1614     int32_t qOffset,
1615     bool useSubtensor)
1616 {
1617     TensorInfo outputTensorInfo({ 1, 3, 2, 6 }, ArmnnType, qScale, qOffset);
1618 
1619     LayerTestResult<T, 4> result = Concat4dTestImpl<ArmnnType>(
1620         workloadFactory, memoryManager, tensorHandleFactory, outputTensorInfo, 3, useSubtensor, qScale, qOffset);
1621 
1622     result.outputExpected = MakeTensor<T, 4>(outputTensorInfo, QuantizedVector<T>(
1623         {
1624              1.0f,  2.0f,
1625             11.0f, 12.0f,
1626             21.0f, 22.0f,
1627              3.0f,  4.0f,
1628             13.0f, 14.0f,
1629             23.0f, 24.0f,
1630 
1631              5.0f,  6.0f,
1632             15.0f, 16.0f,
1633             25.0f, 26.0f,
1634              7.0f,  8.0f,
1635             17.0f, 18.0f,
1636             27.0f, 28.0f,
1637 
1638              9.0f, 10.0f,
1639             19.0f, 20.0f,
1640             29.0f, 30.0f,
1641             11.0f, 12.0f,
1642             21.0f, 22.0f,
1643             31.0f, 32.0f
1644         },
1645         qScale, qOffset));
1646 
1647     return result;
1648 }
1649 
1650 template<DataType ArmnnType, typename T = ResolveType<ArmnnType>>
Concat4dDiffShapeDim0TestImpl(IWorkloadFactory & workloadFactory,const IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory,float qScale,int32_t qOffset)1651 LayerTestResult<T, 4> Concat4dDiffShapeDim0TestImpl(
1652     IWorkloadFactory& workloadFactory,
1653     const IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
1654     const armnn::ITensorHandleFactory& tensorHandleFactory,
1655     float qScale,
1656     int32_t qOffset)
1657 {
1658     constexpr unsigned int dimension = 0u;
1659 
1660     TensorInfo inputTensorInfo0({ 1, 3, 2, 2 }, ArmnnType, qScale, qOffset);
1661     auto input0 = MakeTensor<T, 4>(inputTensorInfo0, QuantizedVector<T>(
1662         {
1663              1.0f,  2.0f,
1664              3.0f,  4.0f,
1665              5.0f,  6.0f,
1666              7.0f,  8.0f,
1667              9.0f, 10.0f,
1668             11.0f, 12.0f
1669         },
1670         qScale, qOffset));
1671 
1672     TensorInfo inputTensorInfo1({ 2, 3, 2, 2 }, ArmnnType, qScale, qOffset);
1673 
1674     auto input1 = MakeTensor<T, 4>(inputTensorInfo1, QuantizedVector<T>(
1675         {
1676             11.0f, 12.0f,
1677             13.0f, 14.0f,
1678             15.0f, 16.0f,
1679             17.0f, 18.0f,
1680             19.0f, 20.0f,
1681             21.0f, 22.0f,
1682 
1683             21.0f, 22.0f,
1684             23.0f, 24.0f,
1685             25.0f, 26.0f,
1686             27.0f, 28.0f,
1687             29.0f, 30.0f,
1688             31.0f, 32.0f
1689         },
1690         qScale, qOffset));
1691 
1692     TensorInfo outputTensorInfo({ 3, 3, 2, 2 }, ArmnnType, qScale, qOffset);
1693 
1694     LayerTestResult<T, 4> result(outputTensorInfo);
1695 
1696     std::vector<T> output;
1697     output.resize(outputTensorInfo.GetNumElements());
1698     Concatenate<T>(workloadFactory,
1699                    memoryManager,
1700                    tensorHandleFactory,
1701                    {inputTensorInfo0, inputTensorInfo1},
1702                    {input0.data(), input1.data()},
1703                    outputTensorInfo,
1704                    output.data(),
1705                    dimension,
1706                    true);
1707 
1708     result.output = MakeTensor<T, 4>(outputTensorInfo, output);
1709     result.outputExpected = MakeTensor<T, 4>(outputTensorInfo, QuantizedVector<T>(
1710         {
1711              1.0f, 2.0f,
1712              3.0f, 4.0f,
1713              5.0f, 6.0f,
1714              7.0f, 8.0f,
1715              9.0f, 10.0f,
1716             11.0f, 12.0f,
1717 
1718             11.0f, 12.0f,
1719             13.0f, 14.0f,
1720             15.0f, 16.0f,
1721             17.0f, 18.0f,
1722             19.0f, 20.0f,
1723             21.0f, 22.0f,
1724 
1725             21.0f, 22.0f,
1726             23.0f, 24.0f,
1727             25.0f, 26.0f,
1728             27.0f, 28.0f,
1729             29.0f, 30.0f,
1730             31.0f, 32.0f
1731         },
1732         qScale, qOffset));
1733 
1734     return result;
1735 }
1736 
1737 template<DataType ArmnnType, typename T = ResolveType<ArmnnType>>
Concat4dDiffShapeDim1TestImpl(IWorkloadFactory & workloadFactory,const IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory,float qScale,int32_t qOffset)1738 LayerTestResult<T, 4> Concat4dDiffShapeDim1TestImpl(
1739     IWorkloadFactory& workloadFactory,
1740     const IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
1741     const armnn::ITensorHandleFactory& tensorHandleFactory,
1742     float qScale,
1743     int32_t qOffset)
1744 {
1745     constexpr unsigned int dimension = 1u;
1746 
1747     TensorInfo inputTensorInfo0({ 1, 3, 2, 2 }, ArmnnType, qScale, qOffset);
1748     auto input0 = MakeTensor<T, 4>(inputTensorInfo0, QuantizedVector<T>(
1749         {
1750              1.0f,  2.0f,
1751              3.0f,  4.0f,
1752              5.0f,  6.0f,
1753              7.0f,  8.0f,
1754              9.0f, 10.0f,
1755             11.0f, 12.0f
1756         },
1757         qScale, qOffset));
1758 
1759     TensorInfo inputTensorInfo1({ 1, 2, 2, 2 }, ArmnnType, qScale, qOffset);
1760 
1761     auto input1 = MakeTensor<T, 4>(inputTensorInfo1, QuantizedVector<T>(
1762         {
1763             11.0f, 12.0f,
1764             13.0f, 14.0f,
1765             15.0f, 16.0f,
1766             17.0f, 18.0f,
1767         },
1768         qScale, qOffset));
1769 
1770     TensorInfo outputTensorInfo({ 1, 5, 2, 2 }, ArmnnType, qScale, qOffset);
1771 
1772     LayerTestResult<T, 4> result(outputTensorInfo);
1773 
1774     std::vector<T> output;
1775     output.resize(outputTensorInfo.GetNumElements());
1776     Concatenate<T>(workloadFactory,
1777                    memoryManager,
1778                    tensorHandleFactory,
1779                    {inputTensorInfo0, inputTensorInfo1},
1780                    {input0.data(), input1.data()},
1781                    outputTensorInfo,
1782                    output.data(),
1783                    dimension,
1784                    true);
1785 
1786     result.output = MakeTensor<T, 4>(outputTensorInfo, output);
1787     result.outputExpected = MakeTensor<T, 4>(outputTensorInfo, QuantizedVector<T>(
1788         {
1789              1.0f,  2.0f,
1790              3.0f,  4.0f,
1791              5.0f,  6.0f,
1792              7.0f,  8.0f,
1793              9.0f, 10.0f,
1794             11.0f, 12.0f,
1795             11.0f, 12.0f,
1796             13.0f, 14.0f,
1797             15.0f, 16.0f,
1798             17.0f, 18.0f
1799         },
1800         qScale, qOffset));
1801 
1802     return result;
1803 }
1804 
1805 template<DataType ArmnnType, typename T = ResolveType<ArmnnType>>
Concat4dDiffShapeDim2TestImpl(IWorkloadFactory & workloadFactory,const IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory,float qScale,int32_t qOffset)1806 LayerTestResult<T, 4> Concat4dDiffShapeDim2TestImpl(
1807     IWorkloadFactory& workloadFactory,
1808     const IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
1809     const armnn::ITensorHandleFactory& tensorHandleFactory,
1810     float qScale,
1811     int32_t qOffset)
1812 {
1813     constexpr unsigned int dimension = 2u;
1814 
1815     TensorInfo inputTensorInfo0({ 1, 3, 2, 2 }, ArmnnType, qScale, qOffset);
1816     auto input0 = MakeTensor<T, 4>(inputTensorInfo0, QuantizedVector<T>(
1817         {
1818              1.0f, 2.0f,
1819              3.0f, 4.0f,
1820              5.0f, 6.0f,
1821              7.0f, 8.0f,
1822             9.0f, 10.0f,
1823             11.0f, 12.0f
1824         },
1825         qScale, qOffset));
1826 
1827     TensorInfo inputTensorInfo1({ 1, 3, 3, 2 }, ArmnnType, qScale, qOffset);
1828     auto input1 = MakeTensor<T, 4>(inputTensorInfo1, QuantizedVector<T>(
1829         {
1830             11.0f, 12.0f,
1831             13.0f, 14.0f,
1832             15.0f, 16.0f,
1833             17.0f, 18.0f,
1834             19.0f, 20.0f,
1835             21.0f, 22.0f,
1836             23.0f, 24.0f,
1837             25.0f, 26.0f,
1838             27.0f, 28.0f
1839         },
1840         qScale, qOffset));
1841 
1842     TensorInfo outputTensorInfo({ 1, 3, 5, 2 }, ArmnnType, qScale, qOffset);
1843     LayerTestResult<T, 4> result(outputTensorInfo);
1844 
1845     std::vector<T> output;
1846     output.resize(outputTensorInfo.GetNumElements());
1847     Concatenate<T>(workloadFactory,
1848                    memoryManager,
1849                    tensorHandleFactory,
1850                    {inputTensorInfo0, inputTensorInfo1},
1851                    {input0.data(), input1.data()},
1852                    outputTensorInfo,
1853                    output.data(),
1854                    dimension,
1855                    true);
1856 
1857     result.output         = MakeTensor<T, 4>(outputTensorInfo, output);
1858     result.outputExpected = MakeTensor<T, 4>(outputTensorInfo, QuantizedVector<T>(
1859         {
1860              1.0f,  2.0f,
1861              3.0f,  4.0f,
1862             11.0f, 12.0f,
1863             13.0f, 14.0f,
1864             15.0f, 16.0f,
1865 
1866              5.0f,  6.0f,
1867              7.0f,  8.0f,
1868             17.0f, 18.0f,
1869             19.0f, 20.0f,
1870             21.0f, 22.0f,
1871 
1872              9.0f, 10.0f,
1873             11.0f, 12.0f,
1874             23.0f, 24.0f,
1875             25.0f, 26.0f,
1876             27.0f, 28.0f
1877         },
1878         qScale, qOffset));
1879 
1880     return result;
1881 }
1882 
1883 template<DataType ArmnnType, typename T = ResolveType<ArmnnType>>
Concat4dDiffShapeDim3TestImpl(IWorkloadFactory & workloadFactory,const IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory,float qScale,int32_t qOffset,bool useSubtensor)1884 LayerTestResult<T, 4> Concat4dDiffShapeDim3TestImpl(
1885     IWorkloadFactory& workloadFactory,
1886     const IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
1887     const armnn::ITensorHandleFactory& tensorHandleFactory,
1888     float qScale,
1889     int32_t qOffset,
1890     bool useSubtensor)
1891 {
1892     constexpr unsigned int dimension = 3u;
1893 
1894     TensorInfo inputTensorInfo0({ 1, 3, 2, 2 }, ArmnnType, qScale, qOffset);
1895     auto input0 = MakeTensor<T, 4>(inputTensorInfo0, QuantizedVector<T>(
1896         {
1897              1.0f,  2.0f,
1898              3.0f,  4.0f,
1899              5.0f,  6.0f,
1900              7.0f,  8.0f,
1901              9.0f, 10.0f,
1902             11.0f, 12.0f
1903         },
1904         qScale, qOffset));
1905 
1906     TensorInfo inputTensorInfo1({ 1, 3, 2, 3 }, ArmnnType, qScale, qOffset);
1907     auto input1 = MakeTensor<T, 4>(inputTensorInfo1, QuantizedVector<T>(
1908         {
1909             11.0f, 12.0f, 13.0f,
1910             14.0f, 15.0f, 16.0f,
1911 
1912             17.0f, 18.0f, 19.0f,
1913             20.0f, 21.0f, 22.0f,
1914 
1915             23.0f, 24.0f, 25.0f,
1916             26.0f, 27.0f, 28.0f
1917         },
1918         qScale, qOffset));
1919 
1920     TensorInfo outputTensorInfo({ 1, 3, 2, 5 }, ArmnnType, qScale, qOffset);
1921 
1922     LayerTestResult<T, 4> result(outputTensorInfo);
1923 
1924     std::vector<T> output;
1925     output.resize(outputTensorInfo.GetNumElements());
1926     Concatenate<T>(workloadFactory,
1927                    memoryManager,
1928                    tensorHandleFactory,
1929                    {inputTensorInfo0, inputTensorInfo1},
1930                    {input0.data(), input1.data()},
1931                    outputTensorInfo,
1932                    output.data(),
1933                    dimension,
1934                    useSubtensor);
1935 
1936     result.output = MakeTensor<T, 4>(outputTensorInfo, output);
1937     result.outputExpected = MakeTensor<T, 4>(outputTensorInfo, QuantizedVector<T>(
1938         {
1939             1.0f, 2.0f, 11.0f, 12.0f, 13.0f,
1940             3.0f, 4.0f, 14.0f, 15.0f, 16.0f,
1941             5.0f, 6.0f, 17.0f, 18.0f, 19.0f,
1942             7.0f, 8.0f, 20.0f, 21.0f, 22.0f,
1943             9.0f, 10.0f, 23.0f, 24.0f, 25.0f,
1944             11.0f, 12.0f, 26.0f, 27.0f, 28.0f
1945         },
1946         qScale, qOffset));
1947 
1948     return result;
1949 }
1950 
1951 template<DataType ArmnnType, typename T>
ConcatDifferentInputOutputQParamTest(IWorkloadFactory & workloadFactory,const IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory,bool useSubtensor)1952 LayerTestResult<T, 3> ConcatDifferentInputOutputQParamTest(
1953     IWorkloadFactory& workloadFactory,
1954     const IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
1955     const armnn::ITensorHandleFactory& tensorHandleFactory,
1956     bool useSubtensor)
1957 {
1958     IgnoreUnused(memoryManager);
1959 
1960     // Defines the tensor descriptors.
1961     TensorInfo outputTensorInfo({ 3, 6, 3 }, ArmnnType);
1962     TensorInfo inputTensorInfo1({ 3, 6, 2 }, ArmnnType);
1963     TensorInfo inputTensorInfo2({ 3, 6, 1 }, ArmnnType);
1964 
1965     std::vector<TensorShape> inputTensorShapes({inputTensorInfo1.GetShape(), inputTensorInfo2.GetShape()});
1966 
1967     // Quantized input1 tensor.
1968     const float inputScale1 = 0.5f;
1969     const int32_t inputOffset1 = 5;
1970 
1971     auto input1 = MakeTensor<T, 3>(inputTensorInfo1, std::vector<T>(
1972     {
1973         1, 2, 3,
1974         4, 5, 6,
1975         7, 8, 9,
1976         10, 11, 12,
1977         13, 14, 15,
1978         16, 17, 18,
1979 
1980         19, 20, 21,
1981         22, 23, 24,
1982         25, 26, 27,
1983         28, 29, 30,
1984         31, 32, 33,
1985         34, 35, 36
1986     }));
1987 
1988     // Quatized input2 tensor.
1989     const float inputScale2 = 0.2f;
1990     const int32_t inputOffset2 = 10;
1991 
1992     auto input2 = MakeTensor<T, 3>(inputTensorInfo2, std::vector<T>(
1993     {
1994         37, 38, 39,
1995         40, 41, 42,
1996         43, 44, 45,
1997         46, 47, 48,
1998         49, 50, 51,
1999         52, 53, 54
2000     }));
2001 
2002     // Quantized output tensor.
2003     const float outputScale = 0.1f;
2004     const int32_t outputOffset = 20;
2005 
2006     LayerTestResult<T, 3> ret(outputTensorInfo);
2007 
2008     ret.outputExpected = MakeTensor<T, 3>(outputTensorInfo, std::vector<T>(
2009     {
2010         0,   5,  74,
2011         10,  15,  76,
2012         20,  25,  78,
2013         30,  35,  80,
2014         40,  45,  82,
2015         50,  55,  84,
2016 
2017         60,  65,  86,
2018         70,  75,  88,
2019         80,  85,  90,
2020         90,  95,  92,
2021         100, 105,  94,
2022         110, 115,  96,
2023 
2024         120, 125,  98,
2025         130, 135, 100,
2026         140, 145, 102,
2027         150, 155, 104,
2028         160, 165, 106,
2029         170, 175, 108
2030     }));
2031 
2032     outputTensorInfo.SetQuantizationScale(outputScale);
2033     outputTensorInfo.SetQuantizationOffset(outputOffset);
2034     inputTensorInfo1.SetQuantizationScale(inputScale1);
2035     inputTensorInfo1.SetQuantizationOffset(inputOffset1);
2036     inputTensorInfo2.SetQuantizationScale(inputScale2);
2037     inputTensorInfo2.SetQuantizationOffset(inputOffset2);
2038 
2039     std::vector<unsigned int> wOrigin1 = { 0, 0, 0 }; //Extent of the window is defined by size of input[0].
2040     ConcatQueueDescriptor::ViewOrigin window1(wOrigin1);
2041 
2042     std::vector<unsigned int> wOrigin2 = { 0, 0, 2 }; //Extent of the window is defined by size of input[1].
2043     ConcatQueueDescriptor::ViewOrigin window2(wOrigin2);
2044 
2045     std::unique_ptr<ITensorHandle> outputHandle = tensorHandleFactory.CreateTensorHandle(outputTensorInfo);
2046 
2047     bool subTensorsSupported = useSubtensor && workloadFactory.SupportsSubTensors();
2048 
2049     std::unique_ptr<ITensorHandle> inputHandle1 =
2050             subTensorsSupported ?
2051             tensorHandleFactory.CreateSubTensorHandle(*outputHandle, inputTensorInfo1.GetShape(), wOrigin1.data()) :
2052             tensorHandleFactory.CreateTensorHandle(inputTensorInfo1);
2053 
2054     std::unique_ptr<ITensorHandle> inputHandle2 =
2055             subTensorsSupported ?
2056             tensorHandleFactory.CreateSubTensorHandle(*outputHandle, inputTensorInfo2.GetShape(), wOrigin2.data()) :
2057             tensorHandleFactory.CreateTensorHandle(inputTensorInfo2);
2058 
2059     ConcatQueueDescriptor data;
2060     OriginsDescriptor desc = CreateDescriptorForConcatenation(
2061             inputTensorShapes.begin(),inputTensorShapes.end(), 2);
2062     data.m_Parameters = desc;
2063 
2064     WorkloadInfo info;
2065     AddInputToWorkload(data, info, inputTensorInfo1, inputHandle1.get());
2066     AddInputToWorkload(data, info, inputTensorInfo2, inputHandle2.get());
2067     AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
2068 
2069     data.m_ViewOrigins.push_back(window1);
2070     data.m_ViewOrigins.push_back(window2);
2071 
2072     std::unique_ptr<IWorkload> workload = workloadFactory.CreateConcat(data, info);
2073 
2074     inputHandle1->Allocate();
2075     inputHandle2->Allocate();
2076     outputHandle->Allocate();
2077 
2078     CopyDataToITensorHandle(inputHandle1.get(), &input1[0][0][0]);
2079     CopyDataToITensorHandle(inputHandle2.get(), &input2[0][0][0]);
2080 
2081     workload->PostAllocationConfigure();
2082     workload->Execute();
2083 
2084     CopyDataFromITensorHandle(&ret.output[0][0][0], outputHandle.get());
2085 
2086     return ret;
2087 }
2088 
2089 //
2090 // Explicit template specializations
2091 //
2092 
2093 template LayerTestResult<ResolveType<DataType::QAsymmU8>, 3>
2094 ConcatDifferentInputOutputQParamTest<DataType::QAsymmU8>(
2095     IWorkloadFactory& workloadFactory,
2096     const IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
2097     const armnn::ITensorHandleFactory& tensorHandleFactory,
2098     bool useSubtensor);
2099 
2100 template LayerTestResult<ResolveType<DataType::QSymmS16>, 3>
2101 ConcatDifferentInputOutputQParamTest<DataType::QSymmS16>(
2102     IWorkloadFactory& workloadFactory,
2103     const IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
2104     const armnn::ITensorHandleFactory& tensorHandleFactory,
2105     bool useSubtensor);
2106 
2107 //
2108 // Implementation functions
2109 //
2110 
ConcatTest(IWorkloadFactory & workloadFactory,const IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory)2111 LayerTestResult<float,3> ConcatTest(
2112     IWorkloadFactory& workloadFactory,
2113     const IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
2114     const armnn::ITensorHandleFactory& tensorHandleFactory)
2115 {
2116     IgnoreUnused(memoryManager);
2117 
2118     unsigned int outputWidth = 3;
2119     unsigned int outputHeight = 6;
2120     unsigned int outputChannels = 3;
2121 
2122     unsigned int inputWidth1 = 3;
2123     unsigned int inputHeight1 = 6;
2124     unsigned int inputChannels1 = 2;
2125 
2126     unsigned int inputWidth2 = 3;
2127     unsigned int inputHeight2 = 6;
2128     unsigned int inputChannels2 = 1;
2129 
2130     // Define the tensor descriptors.
2131     TensorInfo outputTensorInfo({ outputChannels, outputHeight, outputWidth }, DataType::Float32);
2132     TensorInfo inputTensorInfo1({ inputChannels1, inputHeight1, inputWidth1 }, DataType::Float32);
2133     TensorInfo inputTensorInfo2({ inputChannels2, inputHeight2, inputWidth2 }, DataType::Float32);
2134 
2135     LayerTestResult<float,3> ret(outputTensorInfo);
2136 
2137     ret.outputExpected = MakeTensor<float, 3>(outputTensorInfo, std::vector<float>(
2138     {
2139             1.0f, 2.0f, 3.0f,
2140             4.0f, 5.0f, 6.0f,
2141             7.0f, 8.0f, 9.0f,
2142             10.0f, 11.0f, 12.0f,
2143             13.0f, 14.0f, 15.0f,
2144             16.0f, 17.0f, 18.0f,
2145 
2146             19.0f, 20.0f, 21.0f,
2147             22.0f, 23.0f, 24.0f,
2148             25.0f, 26.0f, 27.0f,
2149             28.0f, 29.0f, 30.0f,
2150             31.0f, 32.0f, 33.0f,
2151             34.0f, 35.0f, 36.0f,
2152 
2153             37.0f, 38.0f, 39.0f,
2154             40.0f, 41.0f, 42.0f,
2155             43.0f, 44.0f, 45.0f,
2156             46.0f, 47.0f, 48.0f,
2157             49.0f, 50.0f, 51.0f,
2158             52.0f, 53.0f, 54.0f,
2159         })
2160     );
2161 
2162     auto input1 = MakeTensor<float, 3>(inputTensorInfo1, std::vector<float>(
2163         {
2164             1.0f, 2.0f, 3.0f,
2165             4.0f, 5.0f, 6.0f,
2166             7.0f, 8.0f, 9.0f,
2167             10.0f, 11.0f, 12.0f,
2168             13.0f, 14.0f, 15.0f,
2169             16.0f, 17.0f, 18.0f,
2170 
2171             19.0f, 20.0f, 21.0f,
2172             22.0f, 23.0f, 24.0f,
2173             25.0f, 26.0f, 27.0f,
2174             28.0f, 29.0f, 30.0f,
2175             31.0f, 32.0f, 33.0f,
2176             34.0f, 35.0f, 36.0f,
2177         })
2178     );
2179 
2180     auto input2 = MakeTensor<float, 3>(inputTensorInfo2, std::vector<float>(
2181         {
2182             37.0f, 38.0f, 39.0f,
2183             40.0f, 41.0f, 42.0f,
2184             43.0f, 44.0f, 45.0f,
2185             46.0f, 47.0f, 48.0f,
2186             49.0f, 50.0f, 51.0f,
2187             52.0f, 53.0f, 54.0f,
2188         })
2189     );
2190 
2191     std::vector<unsigned int> wOrigin1 = {0, 0, 0}; //Extent of the window is defined by size of input[0].
2192     ConcatQueueDescriptor::ViewOrigin window1(wOrigin1);
2193 
2194     std::vector<unsigned int> wOrigin2 = {2, 0, 0}; //Extent of the window is defined by size of input[1].
2195     ConcatQueueDescriptor::ViewOrigin window2(wOrigin2);
2196 
2197     std::unique_ptr<ITensorHandle> outputHandle = tensorHandleFactory.CreateTensorHandle(outputTensorInfo);
2198 
2199     bool subTensorsSupported = workloadFactory.SupportsSubTensors();
2200 
2201     std::unique_ptr<ITensorHandle> inputHandle1 =
2202         subTensorsSupported ?
2203             tensorHandleFactory.CreateSubTensorHandle(*outputHandle, inputTensorInfo1.GetShape(), wOrigin1.data()) :
2204             tensorHandleFactory.CreateTensorHandle(inputTensorInfo1);
2205 
2206     std::unique_ptr<ITensorHandle> inputHandle2  =
2207         subTensorsSupported ?
2208             tensorHandleFactory.CreateSubTensorHandle(*outputHandle, inputTensorInfo2.GetShape(), wOrigin2.data()) :
2209             tensorHandleFactory.CreateTensorHandle(inputTensorInfo2);
2210 
2211     ConcatQueueDescriptor data;
2212     WorkloadInfo info;
2213     AddInputToWorkload(data, info, inputTensorInfo1, inputHandle1.get());
2214     AddInputToWorkload(data, info, inputTensorInfo2, inputHandle2.get());
2215     AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
2216 
2217     data.m_ViewOrigins.push_back(window1);
2218     data.m_ViewOrigins.push_back(window2);
2219 
2220     std::unique_ptr<IWorkload> workload = workloadFactory.CreateConcat(data, info);
2221 
2222     inputHandle1->Allocate();
2223     inputHandle2->Allocate();
2224     outputHandle->Allocate();
2225 
2226     CopyDataToITensorHandle(inputHandle1.get(), &input1[0][0][0]);
2227     CopyDataToITensorHandle(inputHandle2.get(), &input2[0][0][0]);
2228 
2229     workload->PostAllocationConfigure();
2230     workload->Execute();
2231 
2232     CopyDataFromITensorHandle(&ret.output[0][0][0], outputHandle.get());
2233 
2234     return ret;
2235 }
2236 
Concat1dTest(IWorkloadFactory & workloadFactory,const IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory)2237 LayerTestResult<float, 1> Concat1dTest(
2238     IWorkloadFactory& workloadFactory,
2239     const IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
2240     const armnn::ITensorHandleFactory& tensorHandleFactory)
2241 {
2242     return Concat1dTestImpl<DataType::Float32>(workloadFactory, memoryManager, tensorHandleFactory, 0.0f, 0);
2243 }
2244 
Concat2dDim0Test(IWorkloadFactory & workloadFactory,const IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory)2245 LayerTestResult<float, 2> Concat2dDim0Test(
2246     IWorkloadFactory& workloadFactory,
2247     const IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
2248     const armnn::ITensorHandleFactory& tensorHandleFactory)
2249 {
2250     return Concat2dDim0TestImpl<DataType::Float32>(workloadFactory, memoryManager, tensorHandleFactory, 0.0f, 0);
2251 }
2252 
Concat2dDim1Test(IWorkloadFactory & workloadFactory,const IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory)2253 LayerTestResult<float, 2> Concat2dDim1Test(
2254     IWorkloadFactory& workloadFactory,
2255     const IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
2256     const armnn::ITensorHandleFactory& tensorHandleFactory)
2257 {
2258     return Concat2dDim1TestImpl<DataType::Float32>(workloadFactory, memoryManager, tensorHandleFactory, 0.0f, 0);
2259 }
2260 
Concat2dDim0DiffInputDimsTest(IWorkloadFactory & workloadFactory,const IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory)2261 LayerTestResult<float, 2> Concat2dDim0DiffInputDimsTest(
2262     IWorkloadFactory& workloadFactory,
2263     const IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
2264     const armnn::ITensorHandleFactory& tensorHandleFactory)
2265 {
2266     return Concat2dDim0DiffInputDimsTestImpl<DataType::Float32>(workloadFactory, memoryManager,
2267                                                                 tensorHandleFactory, 0.0f, 0);
2268 }
2269 
Concat2dDim1DiffInputDimsTest(IWorkloadFactory & workloadFactory,const IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory)2270 LayerTestResult<float, 2> Concat2dDim1DiffInputDimsTest(
2271     IWorkloadFactory& workloadFactory,
2272     const IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
2273     const armnn::ITensorHandleFactory& tensorHandleFactory)
2274 {
2275     return Concat2dDim1DiffInputDimsTestImpl<DataType::Float32>(workloadFactory,
2276                                                                 memoryManager,
2277                                                                 tensorHandleFactory,
2278                                                                 0.0f,
2279                                                                 0);
2280 }
2281 
Concat3dDim0Test(IWorkloadFactory & workloadFactory,const IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory)2282 LayerTestResult<float, 3> Concat3dDim0Test(
2283     IWorkloadFactory& workloadFactory,
2284     const IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
2285     const armnn::ITensorHandleFactory& tensorHandleFactory)
2286 {
2287     return Concat3dDim0TestImpl<DataType::Float32>(workloadFactory, memoryManager, tensorHandleFactory, 0.0f, 0);
2288 }
2289 
Concat3dDim1Test(IWorkloadFactory & workloadFactory,const IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory)2290 LayerTestResult<float, 3> Concat3dDim1Test(
2291     IWorkloadFactory& workloadFactory,
2292     const IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
2293     const armnn::ITensorHandleFactory& tensorHandleFactory)
2294 {
2295     return Concat3dDim1TestImpl<DataType::Float32>(workloadFactory, memoryManager, tensorHandleFactory, 0.0f, 0);
2296 }
2297 
Concat3dDim2Test(IWorkloadFactory & workloadFactory,const IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory,bool useSubtensor)2298 LayerTestResult<float, 3> Concat3dDim2Test(
2299     IWorkloadFactory& workloadFactory,
2300     const IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
2301     const armnn::ITensorHandleFactory& tensorHandleFactory,
2302     bool useSubtensor)
2303 {
2304     return Concat3dDim2TestImpl<DataType::Float32>(workloadFactory, memoryManager, tensorHandleFactory,
2305                                                    useSubtensor, 0.0f, 0);
2306 }
2307 
Concat3dDim0DiffInputDimsTest(IWorkloadFactory & workloadFactory,const IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory)2308 LayerTestResult<float, 3> Concat3dDim0DiffInputDimsTest(
2309     IWorkloadFactory& workloadFactory,
2310     const IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
2311     const armnn::ITensorHandleFactory& tensorHandleFactory)
2312 {
2313     return Concat3dDim0DiffInputDimsTestImpl<DataType::Float32>(
2314         workloadFactory, memoryManager, tensorHandleFactory, 0.0f, 0);
2315 }
2316 
Concat3dDim1DiffInputDimsTest(IWorkloadFactory & workloadFactory,const IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory)2317 LayerTestResult<float, 3> Concat3dDim1DiffInputDimsTest(
2318     IWorkloadFactory& workloadFactory,
2319     const IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
2320     const armnn::ITensorHandleFactory& tensorHandleFactory)
2321 {
2322     return Concat3dDim1DiffInputDimsTestImpl<DataType::Float32>(workloadFactory, memoryManager,
2323                                                                 tensorHandleFactory, 0.0f, 0);
2324 }
2325 
Concat3dDim2DiffInputDimsTest(IWorkloadFactory & workloadFactory,const IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory,bool useSubtensor)2326 LayerTestResult<float, 3> Concat3dDim2DiffInputDimsTest(
2327     IWorkloadFactory& workloadFactory,
2328     const IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
2329     const armnn::ITensorHandleFactory& tensorHandleFactory,
2330     bool useSubtensor)
2331 {
2332     return Concat3dDim2DiffInputDimsTestImpl<DataType::Float32>(
2333         workloadFactory, memoryManager, tensorHandleFactory, useSubtensor, 0.0f, 0);
2334 }
2335 
Concat4dDim0Test(IWorkloadFactory & workloadFactory,const IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory)2336 LayerTestResult<float, 4> Concat4dDim0Test(
2337     IWorkloadFactory& workloadFactory,
2338     const IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
2339     const armnn::ITensorHandleFactory& tensorHandleFactory)
2340 {
2341     return Concat4dDim0TestImpl<DataType::Float32>(workloadFactory, memoryManager, tensorHandleFactory, 0.0f, 0);
2342 }
2343 
Concat4dDim1Test(IWorkloadFactory & workloadFactory,const IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory)2344 LayerTestResult<float, 4> Concat4dDim1Test(
2345     IWorkloadFactory& workloadFactory,
2346     const IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
2347     const armnn::ITensorHandleFactory& tensorHandleFactory)
2348 {
2349     return Concat4dDim1TestImpl<DataType::Float32>(workloadFactory, memoryManager, tensorHandleFactory, 0.0f, 0);
2350 }
2351 
Concat4dDim2Test(IWorkloadFactory & workloadFactory,const IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory)2352 LayerTestResult<float, 4> Concat4dDim2Test(
2353     IWorkloadFactory& workloadFactory,
2354     const IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
2355     const armnn::ITensorHandleFactory& tensorHandleFactory)
2356 {
2357     return Concat4dDim2TestImpl<DataType::Float32>(workloadFactory, memoryManager, tensorHandleFactory, 0.0f, 0);
2358 }
2359 
Concat4dDim3Test(IWorkloadFactory & workloadFactory,const IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory,bool useSubtensor)2360 LayerTestResult<float, 4> Concat4dDim3Test(
2361     IWorkloadFactory& workloadFactory,
2362     const IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
2363     const armnn::ITensorHandleFactory& tensorHandleFactory,
2364     bool useSubtensor)
2365 {
2366     return Concat4dDim3TestImpl<DataType::Float32>(workloadFactory, memoryManager,
2367                                                    tensorHandleFactory, 0.0f, 0, useSubtensor);
2368 }
2369 
Concat4dDiffShapeDim0Test(IWorkloadFactory & workloadFactory,const IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory)2370 LayerTestResult<float, 4> Concat4dDiffShapeDim0Test(
2371     IWorkloadFactory& workloadFactory,
2372     const IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
2373     const armnn::ITensorHandleFactory& tensorHandleFactory)
2374 {
2375     return Concat4dDiffShapeDim0TestImpl<DataType::Float32>(workloadFactory, memoryManager,
2376                                                             tensorHandleFactory, 0.0f, 0);
2377 }
2378 
Concat4dDiffShapeDim1Test(IWorkloadFactory & workloadFactory,const IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory)2379 LayerTestResult<float, 4> Concat4dDiffShapeDim1Test(
2380     IWorkloadFactory& workloadFactory,
2381     const IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
2382     const armnn::ITensorHandleFactory& tensorHandleFactory)
2383 {
2384     return Concat4dDiffShapeDim1TestImpl<DataType::Float32>(
2385         workloadFactory, memoryManager, tensorHandleFactory, 0.0f, 0);
2386 }
2387 
Concat4dDiffShapeDim2Test(IWorkloadFactory & workloadFactory,const IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory)2388 LayerTestResult<float, 4> Concat4dDiffShapeDim2Test(
2389     IWorkloadFactory& workloadFactory,
2390     const IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
2391     const armnn::ITensorHandleFactory& tensorHandleFactory)
2392 {
2393     return Concat4dDiffShapeDim2TestImpl<DataType::Float32>(workloadFactory, memoryManager,
2394                                                             tensorHandleFactory, 0.0f, 0);
2395 }
2396 
Concat4dDiffShapeDim3Test(IWorkloadFactory & workloadFactory,const IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory,bool useSubtensor)2397 LayerTestResult<float, 4> Concat4dDiffShapeDim3Test(
2398     IWorkloadFactory& workloadFactory,
2399     const IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
2400     const armnn::ITensorHandleFactory& tensorHandleFactory,
2401     bool useSubtensor)
2402 {
2403     return Concat4dDiffShapeDim3TestImpl<DataType::Float32>(
2404         workloadFactory, memoryManager, tensorHandleFactory, 0.0f, 0, useSubtensor);
2405 }
2406 
ConcatFloat16Test(IWorkloadFactory & workloadFactory,const IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory)2407 LayerTestResult<Half, 3> ConcatFloat16Test(
2408     IWorkloadFactory& workloadFactory,
2409     const IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
2410     const armnn::ITensorHandleFactory& tensorHandleFactory)
2411 {
2412     return Concat3dDim1TestImpl<DataType::Float16>(workloadFactory, memoryManager, tensorHandleFactory, 0.0f, 0);
2413 }
2414 
ConcatBFloat16Test(IWorkloadFactory & workloadFactory,const IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory)2415 LayerTestResult<BFloat16, 3> ConcatBFloat16Test(
2416     IWorkloadFactory& workloadFactory,
2417     const IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
2418     const armnn::ITensorHandleFactory& tensorHandleFactory)
2419 {
2420     return Concat3dDim1TestImpl<DataType::BFloat16>(workloadFactory, memoryManager, tensorHandleFactory, 0.0f, 0);
2421 }
2422 
ConcatUint8DifferentQParamsTest(IWorkloadFactory & workloadFactory,const IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory)2423 LayerTestResult<uint8_t, 3> ConcatUint8DifferentQParamsTest(
2424     IWorkloadFactory& workloadFactory,
2425     const IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
2426     const armnn::ITensorHandleFactory& tensorHandleFactory)
2427 {
2428     IgnoreUnused(memoryManager);
2429 
2430     unsigned int outputWidth = 3;
2431     unsigned int outputHeight = 6;
2432     unsigned int outputChannels = 3;
2433 
2434     unsigned int inputWidth1 = 3;
2435     unsigned int inputHeight1 = 6;
2436     unsigned int inputChannels1 = 2;
2437 
2438     unsigned int inputWidth2 = 3;
2439     unsigned int inputHeight2 = 6;
2440     unsigned int inputChannels2 = 1;
2441 
2442     // Defines the tensor descriptors.
2443     TensorInfo outputTensorInfo({ outputChannels, outputHeight, outputWidth }, DataType::QAsymmU8);
2444     TensorInfo inputTensorInfo1({ inputChannels1, inputHeight1, inputWidth1 }, DataType::QAsymmU8);
2445     TensorInfo inputTensorInfo2({ inputChannels2, inputHeight2, inputWidth2 }, DataType::QAsymmU8);
2446 
2447     // Quantized input1 tensor. Range [-3, 1]
2448     const float inputScale1 = 0.015686f;
2449     const int32_t inputOffset1 = 192;
2450 
2451     auto input1 = MakeTensor<uint8_t, 3>(inputTensorInfo1, std::vector<uint8_t>(
2452     {
2453         1, 2, 3,
2454         4, 5, 6,
2455         7, 8, 9,
2456         10, 11, 12,
2457         13, 14, 15,
2458         16, 17, 18,
2459 
2460         19, 20, 21,
2461         22, 23, 24,
2462         25, 26, 27,
2463         28, 29, 30,
2464         31, 32, 33,
2465         34, 35, 36,
2466     })
2467     );
2468 
2469     // Quatized input2 tensor. Range [-1, 4]
2470     const float inputScale2 = 0.019608f;
2471     const int32_t inputOffset2 = 50;
2472 
2473     auto input2 = MakeTensor<uint8_t, 3>(inputTensorInfo2, std::vector<uint8_t>(
2474     {
2475         37, 38, 39,
2476         40, 41, 42,
2477         43, 44, 45,
2478         46, 47, 48,
2479         49, 50, 51,
2480         52, 53, 54,
2481     })
2482     );
2483 
2484     // Output has the same quantization parameters than input1,
2485     // so that only the requantization of input2 is required
2486     const float outputScale = 0.015686f;
2487     const int32_t outputOffset = 192;
2488 
2489     LayerTestResult<uint8_t, 3> ret(outputTensorInfo);
2490 
2491     ret.outputExpected = MakeTensor<uint8_t, 3>(outputTensorInfo, std::vector<uint8_t>(
2492     {
2493         1, 2, 3,
2494         4, 5, 6,
2495         7, 8, 9,
2496         10, 11, 12,
2497         13, 14, 15,
2498         16, 17, 18,
2499 
2500         19, 20, 21,
2501         22, 23, 24,
2502         25, 26, 27,
2503         28, 29, 30,
2504         31, 32, 33,
2505         34, 35, 36,
2506 
2507         176, 177, 178,
2508         179, 181, 182,
2509         183, 184, 186,
2510         187, 188, 189,
2511         191, 192, 193,
2512         195, 196, 197,
2513     })
2514     );
2515 
2516     outputTensorInfo.SetQuantizationScale(outputScale);
2517     outputTensorInfo.SetQuantizationOffset(outputOffset);
2518     inputTensorInfo1.SetQuantizationScale(inputScale1);
2519     inputTensorInfo1.SetQuantizationOffset(inputOffset1);
2520     inputTensorInfo2.SetQuantizationScale(inputScale2);
2521     inputTensorInfo2.SetQuantizationOffset(inputOffset2);
2522 
2523     std::vector<unsigned int> wOrigin1 = { 0, 0, 0 }; //Extent of the window is defined by size of input[0].
2524     ConcatQueueDescriptor::ViewOrigin window1(wOrigin1);
2525 
2526     std::vector<unsigned int> wOrigin2 = { 2, 0, 0 }; //Extent of the window is defined by size of input[1].
2527     ConcatQueueDescriptor::ViewOrigin window2(wOrigin2);
2528 
2529     std::unique_ptr<ITensorHandle> outputHandle = tensorHandleFactory.CreateTensorHandle(outputTensorInfo);
2530 
2531     bool subTensorsSupported = workloadFactory.SupportsSubTensors();
2532 
2533     std::unique_ptr<ITensorHandle> inputHandle1 =
2534             subTensorsSupported ?
2535             tensorHandleFactory.CreateSubTensorHandle(*outputHandle, inputTensorInfo1.GetShape(), wOrigin1.data()) :
2536             tensorHandleFactory.CreateTensorHandle(inputTensorInfo1);
2537 
2538     std::unique_ptr<ITensorHandle> inputHandle2 =
2539             subTensorsSupported ?
2540             tensorHandleFactory.CreateSubTensorHandle(*outputHandle, inputTensorInfo2.GetShape(), wOrigin2.data()) :
2541             tensorHandleFactory.CreateTensorHandle(inputTensorInfo2);
2542 
2543     ConcatQueueDescriptor data;
2544     WorkloadInfo info;
2545     AddInputToWorkload(data, info, inputTensorInfo1, inputHandle1.get());
2546     AddInputToWorkload(data, info, inputTensorInfo2, inputHandle2.get());
2547     AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
2548 
2549     data.m_ViewOrigins.push_back(window1);
2550     data.m_ViewOrigins.push_back(window2);
2551 
2552     std::unique_ptr<IWorkload> workload = workloadFactory.CreateConcat(data, info);
2553 
2554     inputHandle1->Allocate();
2555     inputHandle2->Allocate();
2556     outputHandle->Allocate();
2557 
2558     CopyDataToITensorHandle(inputHandle1.get(), &input1[0][0][0]);
2559     CopyDataToITensorHandle(inputHandle2.get(), &input2[0][0][0]);
2560 
2561     workload->PostAllocationConfigure();
2562     workload->Execute();
2563 
2564     CopyDataFromITensorHandle(&ret.output[0][0][0], outputHandle.get());
2565 
2566     return ret;
2567 }
2568 
ConcatUint8Test(IWorkloadFactory & workloadFactory,const IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory)2569 LayerTestResult<uint8_t, 3> ConcatUint8Test(
2570     IWorkloadFactory& workloadFactory,
2571     const IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
2572     const armnn::ITensorHandleFactory& tensorHandleFactory)
2573 {
2574     IgnoreUnused(memoryManager);
2575 
2576     unsigned int outputWidth = 3;
2577     unsigned int outputHeight = 6;
2578     unsigned int outputChannels = 3;
2579 
2580     unsigned int inputWidth1 = 3;
2581     unsigned int inputHeight1 = 6;
2582     unsigned int inputChannels1 = 2;
2583 
2584     unsigned int inputWidth2 = 3;
2585     unsigned int inputHeight2 = 6;
2586     unsigned int inputChannels2 = 1;
2587 
2588     // Defines the tensor descriptors.
2589     TensorInfo outputTensorInfo({ outputChannels, outputHeight, outputWidth }, DataType::QAsymmU8);
2590     TensorInfo inputTensorInfo1({ inputChannels1, inputHeight1, inputWidth1 }, DataType::QAsymmU8);
2591     TensorInfo inputTensorInfo2({ inputChannels2, inputHeight2, inputWidth2 }, DataType::QAsymmU8);
2592 
2593     // Arbitrary scale and offsets. They don't really matter as the Concat operator doesn't dequantize/quantize them.
2594     const float scale = 0.13497836f;
2595     const int32_t offset = -7;
2596 
2597     outputTensorInfo.SetQuantizationScale(scale);
2598     outputTensorInfo.SetQuantizationOffset(offset);
2599     inputTensorInfo1.SetQuantizationScale(scale);
2600     inputTensorInfo1.SetQuantizationOffset(offset);
2601     inputTensorInfo2.SetQuantizationScale(scale);
2602     inputTensorInfo2.SetQuantizationOffset(offset);
2603 
2604     LayerTestResult<uint8_t, 3> ret(outputTensorInfo);
2605 
2606     ret.outputExpected = MakeTensor<uint8_t, 3>(outputTensorInfo, std::vector<uint8_t>(
2607         {
2608             1, 2, 3,
2609             4, 5, 6,
2610             7, 8, 9,
2611             10, 11, 12,
2612             13, 14, 15,
2613             16, 17, 18,
2614 
2615             19, 20, 21,
2616             22, 23, 24,
2617             25, 26, 27,
2618             28, 29, 30,
2619             31, 32, 33,
2620             34, 35, 36,
2621 
2622             37, 38, 39,
2623             40, 41, 42,
2624             43, 44, 45,
2625             46, 47, 48,
2626             49, 50, 51,
2627             52, 53, 54,
2628         })
2629     );
2630 
2631     auto input1 = MakeTensor<uint8_t, 3>(inputTensorInfo1, std::vector<uint8_t>(
2632     {
2633         1, 2, 3,
2634         4, 5, 6,
2635         7, 8, 9,
2636         10, 11, 12,
2637         13, 14, 15,
2638         16, 17, 18,
2639 
2640         19, 20, 21,
2641         22, 23, 24,
2642         25, 26, 27,
2643         28, 29, 30,
2644         31, 32, 33,
2645         34, 35, 36,
2646     })
2647     );
2648 
2649     auto input2 = MakeTensor<uint8_t, 3>(inputTensorInfo2, std::vector<uint8_t>(
2650     {
2651         37, 38, 39,
2652         40, 41, 42,
2653         43, 44, 45,
2654         46, 47, 48,
2655         49, 50, 51,
2656         52, 53, 54,
2657     })
2658     );
2659 
2660     std::vector<unsigned int> wOrigin1 = { 0, 0, 0 }; //Extent of the window is defined by size of input[0].
2661     ConcatQueueDescriptor::ViewOrigin window1(wOrigin1);
2662 
2663     std::vector<unsigned int> wOrigin2 = { 2, 0, 0 }; //Extent of the window is defined by size of input[1].
2664     ConcatQueueDescriptor::ViewOrigin window2(wOrigin2);
2665 
2666     std::unique_ptr<ITensorHandle> outputHandle = tensorHandleFactory.CreateTensorHandle(outputTensorInfo);
2667 
2668     bool subTensorsSupported = workloadFactory.SupportsSubTensors();
2669 
2670     std::unique_ptr<ITensorHandle> inputHandle1 =
2671         subTensorsSupported ?
2672             tensorHandleFactory.CreateSubTensorHandle(*outputHandle, inputTensorInfo1.GetShape(), wOrigin1.data()) :
2673             tensorHandleFactory.CreateTensorHandle(inputTensorInfo1);
2674 
2675     std::unique_ptr<ITensorHandle> inputHandle2 =
2676         subTensorsSupported ?
2677             tensorHandleFactory.CreateSubTensorHandle(*outputHandle, inputTensorInfo2.GetShape(), wOrigin2.data()) :
2678             tensorHandleFactory.CreateTensorHandle(inputTensorInfo2);
2679 
2680 
2681     ConcatQueueDescriptor data;
2682     WorkloadInfo info;
2683     AddInputToWorkload(data, info, inputTensorInfo1, inputHandle1.get());
2684     AddInputToWorkload(data, info, inputTensorInfo2, inputHandle2.get());
2685     AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
2686 
2687     data.m_ViewOrigins.push_back(window1);
2688     data.m_ViewOrigins.push_back(window2);
2689 
2690     std::unique_ptr<IWorkload> workload = workloadFactory.CreateConcat(data, info);
2691 
2692     inputHandle1->Allocate();
2693     inputHandle2->Allocate();
2694     outputHandle->Allocate();
2695 
2696     CopyDataToITensorHandle(inputHandle1.get(), &input1[0][0][0]);
2697     CopyDataToITensorHandle(inputHandle2.get(), &input2[0][0][0]);
2698 
2699     workload->PostAllocationConfigure();
2700     workload->Execute();
2701 
2702     CopyDataFromITensorHandle(&ret.output[0][0][0], outputHandle.get());
2703 
2704     return ret;
2705 }
2706 
ConcatUint16Test(IWorkloadFactory & workloadFactory,const IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory)2707 LayerTestResult<uint16_t, 3> ConcatUint16Test(
2708         IWorkloadFactory& workloadFactory,
2709         const IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
2710         const armnn::ITensorHandleFactory& tensorHandleFactory)
2711 {
2712     IgnoreUnused(memoryManager);
2713 
2714     unsigned int outputWidth = 3;
2715     unsigned int outputHeight = 6;
2716     unsigned int outputChannels = 3;
2717 
2718     unsigned int inputWidth1 = 3;
2719     unsigned int inputHeight1 = 6;
2720     unsigned int inputChannels1 = 2;
2721 
2722     unsigned int inputWidth2 = 3;
2723     unsigned int inputHeight2 = 6;
2724     unsigned int inputChannels2 = 1;
2725 
2726     // Defines the tensor descriptors.
2727     TensorInfo outputTensorInfo({ outputChannels, outputHeight, outputWidth }, DataType::QSymmS16);
2728     TensorInfo inputTensorInfo1({ inputChannels1, inputHeight1, inputWidth1 }, DataType::QSymmS16);
2729     TensorInfo inputTensorInfo2({ inputChannels2, inputHeight2, inputWidth2 }, DataType::QSymmS16);
2730 
2731     // Arbitrary scale and offsets. They don't really matter as the Concat operator doesn't dequantize/quantize them.
2732     const float scale = 0.13497836f;
2733     const int32_t offset = -7;
2734 
2735     outputTensorInfo.SetQuantizationScale(scale);
2736     outputTensorInfo.SetQuantizationOffset(offset);
2737     inputTensorInfo1.SetQuantizationScale(scale);
2738     inputTensorInfo1.SetQuantizationOffset(offset);
2739     inputTensorInfo2.SetQuantizationScale(scale);
2740     inputTensorInfo2.SetQuantizationOffset(offset);
2741 
2742     LayerTestResult<uint16_t, 3> ret(outputTensorInfo);
2743 
2744     ret.outputExpected = MakeTensor<uint16_t, 3>(outputTensorInfo, std::vector<uint16_t>(
2745     {
2746         1, 2, 3,
2747         4, 5, 6,
2748         7, 8, 9,
2749         10, 11, 12,
2750         13, 14, 15,
2751         16, 17, 18,
2752 
2753         19, 20, 21,
2754         22, 23, 24,
2755         25, 26, 27,
2756         28, 29, 30,
2757         31, 32, 33,
2758         34, 35, 36,
2759 
2760         37, 38, 39,
2761         40, 41, 42,
2762         43, 44, 45,
2763         46, 47, 48,
2764         49, 50, 51,
2765         52, 53, 54,
2766     }));
2767 
2768     auto input1 = MakeTensor<uint16_t, 3>(inputTensorInfo1, std::vector<uint16_t>(
2769     {
2770         1, 2, 3,
2771         4, 5, 6,
2772         7, 8, 9,
2773         10, 11, 12,
2774         13, 14, 15,
2775         16, 17, 18,
2776 
2777         19, 20, 21,
2778         22, 23, 24,
2779         25, 26, 27,
2780         28, 29, 30,
2781         31, 32, 33,
2782         34, 35, 36,
2783     }));
2784 
2785     auto input2 = MakeTensor<uint16_t, 3>(inputTensorInfo2, std::vector<uint16_t>(
2786     {
2787         37, 38, 39,
2788         40, 41, 42,
2789         43, 44, 45,
2790         46, 47, 48,
2791         49, 50, 51,
2792         52, 53, 54,
2793     }));
2794 
2795     std::vector<unsigned int> wOrigin1 = { 0, 0, 0 }; //Extent of the window is defined by size of input[0].
2796     ConcatQueueDescriptor::ViewOrigin window1(wOrigin1);
2797 
2798     std::vector<unsigned int> wOrigin2 = { 2, 0, 0 }; //Extent of the window is defined by size of input[1].
2799     ConcatQueueDescriptor::ViewOrigin window2(wOrigin2);
2800 
2801 
2802     std::unique_ptr<ITensorHandle> outputHandle = tensorHandleFactory.CreateTensorHandle(outputTensorInfo);
2803 
2804     bool subTensorsSupported = workloadFactory.SupportsSubTensors();
2805 
2806     std::unique_ptr<ITensorHandle> inputHandle1 =
2807             subTensorsSupported ?
2808             tensorHandleFactory.CreateSubTensorHandle(*outputHandle, inputTensorInfo1.GetShape(), wOrigin1.data()) :
2809             tensorHandleFactory.CreateTensorHandle(inputTensorInfo1);
2810 
2811     std::unique_ptr<ITensorHandle> inputHandle2 =
2812             subTensorsSupported ?
2813             tensorHandleFactory.CreateSubTensorHandle(*outputHandle, inputTensorInfo2.GetShape(), wOrigin2.data()) :
2814             tensorHandleFactory.CreateTensorHandle(inputTensorInfo2);
2815 
2816 
2817     ConcatQueueDescriptor data;
2818     WorkloadInfo info;
2819     AddInputToWorkload(data, info, inputTensorInfo1, inputHandle1.get());
2820     AddInputToWorkload(data, info, inputTensorInfo2, inputHandle2.get());
2821     AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
2822 
2823     data.m_ViewOrigins.push_back(window1);
2824     data.m_ViewOrigins.push_back(window2);
2825 
2826     std::unique_ptr<IWorkload> workload = workloadFactory.CreateConcat(data, info);
2827 
2828     inputHandle1->Allocate();
2829     inputHandle2->Allocate();
2830     outputHandle->Allocate();
2831 
2832     CopyDataToITensorHandle(inputHandle1.get(), &input1[0][0][0]);
2833     CopyDataToITensorHandle(inputHandle2.get(), &input2[0][0][0]);
2834 
2835     workload->PostAllocationConfigure();
2836     workload->Execute();
2837 
2838     CopyDataFromITensorHandle(&ret.output[0][0][0], outputHandle.get());
2839 
2840     return ret;
2841 }
2842 
Concat1dUint8Test(IWorkloadFactory & workloadFactory,const IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory)2843 LayerTestResult<uint8_t, 1> Concat1dUint8Test(
2844     IWorkloadFactory& workloadFactory,
2845     const IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
2846     const armnn::ITensorHandleFactory& tensorHandleFactory)
2847 {
2848     return Concat1dTestImpl<DataType::QAsymmU8>(workloadFactory, memoryManager, tensorHandleFactory, 0.5f, -1);
2849 }
2850 
Concat2dDim0Uint8Test(IWorkloadFactory & workloadFactory,const IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory)2851 LayerTestResult<uint8_t, 2> Concat2dDim0Uint8Test(
2852     IWorkloadFactory& workloadFactory,
2853     const IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
2854     const armnn::ITensorHandleFactory& tensorHandleFactory)
2855 {
2856     return Concat2dDim0TestImpl<DataType::QAsymmU8>(workloadFactory, memoryManager, tensorHandleFactory, 0.5f, -1);
2857 }
2858 
Concat2dDim1Uint8Test(IWorkloadFactory & workloadFactory,const IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory)2859 LayerTestResult<uint8_t, 2> Concat2dDim1Uint8Test(
2860     IWorkloadFactory& workloadFactory,
2861     const IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
2862     const armnn::ITensorHandleFactory& tensorHandleFactory)
2863 {
2864     return Concat2dDim1TestImpl<DataType::QAsymmU8>(workloadFactory, memoryManager, tensorHandleFactory, 0.5f, -1);
2865 }
2866 
Concat2dDim0DiffInputDimsUint8Test(IWorkloadFactory & workloadFactory,const IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory)2867 LayerTestResult<uint8_t, 2> Concat2dDim0DiffInputDimsUint8Test(
2868     IWorkloadFactory& workloadFactory,
2869     const IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
2870     const armnn::ITensorHandleFactory& tensorHandleFactory)
2871 {
2872     return Concat2dDim0DiffInputDimsTestImpl<DataType::QAsymmU8>(
2873         workloadFactory, memoryManager, tensorHandleFactory, 0.5f, -1);
2874 }
2875 
Concat2dDim1DiffInputDimsUint8Test(IWorkloadFactory & workloadFactory,const IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory)2876 LayerTestResult<uint8_t, 2> Concat2dDim1DiffInputDimsUint8Test(
2877     IWorkloadFactory& workloadFactory,
2878     const IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
2879     const armnn::ITensorHandleFactory& tensorHandleFactory)
2880 {
2881     return Concat2dDim1DiffInputDimsTestImpl<DataType::QAsymmU8>(
2882         workloadFactory, memoryManager, tensorHandleFactory, 0.5f, -1);
2883 }
2884 
Concat3dDim0Uint8Test(IWorkloadFactory & workloadFactory,const IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory)2885 LayerTestResult<uint8_t, 3> Concat3dDim0Uint8Test(
2886     IWorkloadFactory& workloadFactory,
2887     const IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
2888     const armnn::ITensorHandleFactory& tensorHandleFactory)
2889 {
2890     return Concat3dDim0TestImpl<DataType::QAsymmU8>(workloadFactory, memoryManager, tensorHandleFactory, 0.5f, -1);
2891 }
2892 
Concat3dDim1Uint8Test(IWorkloadFactory & workloadFactory,const IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory)2893 LayerTestResult<uint8_t, 3> Concat3dDim1Uint8Test(
2894     IWorkloadFactory& workloadFactory,
2895     const IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
2896     const armnn::ITensorHandleFactory& tensorHandleFactory)
2897 {
2898     return Concat3dDim1TestImpl<DataType::QAsymmU8>(workloadFactory, memoryManager, tensorHandleFactory, 0.5f, -1);
2899 }
2900 
Concat3dDim2Uint8Test(IWorkloadFactory & workloadFactory,const IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory,bool useSubtensor)2901 LayerTestResult<uint8_t, 3> Concat3dDim2Uint8Test(
2902     IWorkloadFactory& workloadFactory,
2903     const IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
2904     const armnn::ITensorHandleFactory& tensorHandleFactory,
2905     bool useSubtensor)
2906 {
2907     return Concat3dDim2TestImpl<DataType::QAsymmU8>(
2908         workloadFactory, memoryManager, tensorHandleFactory, useSubtensor, 0.5f, -1);
2909 }
2910 
Concat3dDim0DiffInputDimsUint8Test(IWorkloadFactory & workloadFactory,const IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory)2911 LayerTestResult<uint8_t, 3> Concat3dDim0DiffInputDimsUint8Test(
2912     IWorkloadFactory& workloadFactory,
2913     const IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
2914     const armnn::ITensorHandleFactory& tensorHandleFactory)
2915 {
2916     return Concat3dDim0TestImpl<DataType::QAsymmU8>(workloadFactory, memoryManager, tensorHandleFactory, 0.5f, -1);
2917 }
2918 
Concat3dDim1DiffInputDimsUint8Test(IWorkloadFactory & workloadFactory,const IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory)2919 LayerTestResult<uint8_t, 3> Concat3dDim1DiffInputDimsUint8Test(
2920     IWorkloadFactory& workloadFactory,
2921     const IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
2922     const armnn::ITensorHandleFactory& tensorHandleFactory)
2923 {
2924     return Concat3dDim1DiffInputDimsTestImpl<DataType::QAsymmU8>(
2925         workloadFactory, memoryManager, tensorHandleFactory, 0.5f, -1);
2926 }
2927 
Concat3dDim2DiffInputDimsUint8Test(IWorkloadFactory & workloadFactory,const IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory,bool useSubtensor)2928 LayerTestResult<uint8_t, 3> Concat3dDim2DiffInputDimsUint8Test(
2929     IWorkloadFactory& workloadFactory,
2930     const IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
2931     const armnn::ITensorHandleFactory& tensorHandleFactory,
2932     bool useSubtensor)
2933 {
2934     return Concat3dDim2DiffInputDimsTestImpl<DataType::QAsymmU8>(
2935         workloadFactory, memoryManager, tensorHandleFactory, useSubtensor, 0.5f, -1);
2936 }
2937 
Concat4dDim0Uint8Test(IWorkloadFactory & workloadFactory,const IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory)2938 LayerTestResult<uint8_t, 4> Concat4dDim0Uint8Test(
2939     IWorkloadFactory& workloadFactory,
2940     const IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
2941     const armnn::ITensorHandleFactory& tensorHandleFactory)
2942 {
2943     return Concat4dDim0TestImpl<DataType::QAsymmU8>(workloadFactory, memoryManager, tensorHandleFactory, 0.5f, -1);
2944 }
2945 
Concat4dDim1Uint8Test(IWorkloadFactory & workloadFactory,const IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory)2946 LayerTestResult<uint8_t, 4> Concat4dDim1Uint8Test(
2947     IWorkloadFactory& workloadFactory,
2948     const IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
2949     const armnn::ITensorHandleFactory& tensorHandleFactory)
2950 {
2951     return Concat4dDim1TestImpl<DataType::QAsymmU8>(workloadFactory, memoryManager, tensorHandleFactory, 0.5f, -1);
2952 }
2953 
Concat4dDim2Uint8Test(IWorkloadFactory & workloadFactory,const IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory)2954 LayerTestResult<uint8_t, 4> Concat4dDim2Uint8Test(
2955     IWorkloadFactory& workloadFactory,
2956     const IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
2957     const armnn::ITensorHandleFactory& tensorHandleFactory)
2958 {
2959     return Concat4dDim2TestImpl<DataType::QAsymmU8>(workloadFactory, memoryManager, tensorHandleFactory, 0.5f, -1);
2960 }
2961 
Concat4dDim3Uint8Test(IWorkloadFactory & workloadFactory,const IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory,bool useSubtensor)2962 LayerTestResult<uint8_t, 4> Concat4dDim3Uint8Test(
2963     IWorkloadFactory& workloadFactory,
2964     const IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
2965     const armnn::ITensorHandleFactory& tensorHandleFactory, bool useSubtensor)
2966 {
2967     return Concat4dDim3TestImpl<DataType::QAsymmU8>(
2968         workloadFactory, memoryManager, tensorHandleFactory, 0.5f, -1, useSubtensor);
2969 }
2970 
Concat4dDiffShapeDim0Uint8Test(IWorkloadFactory & workloadFactory,const IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory)2971 LayerTestResult<uint8_t, 4> Concat4dDiffShapeDim0Uint8Test(
2972     IWorkloadFactory& workloadFactory,
2973     const IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
2974     const armnn::ITensorHandleFactory& tensorHandleFactory)
2975 {
2976     return Concat4dDiffShapeDim0TestImpl<DataType::QAsymmU8>(
2977         workloadFactory, memoryManager, tensorHandleFactory, 0.5f, -1);
2978 }
2979 
Concat4dDiffShapeDim1Uint8Test(IWorkloadFactory & workloadFactory,const IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory)2980 LayerTestResult<uint8_t, 4> Concat4dDiffShapeDim1Uint8Test(
2981     IWorkloadFactory& workloadFactory,
2982     const IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
2983     const armnn::ITensorHandleFactory& tensorHandleFactory)
2984 {
2985     return Concat4dDiffShapeDim1TestImpl<DataType::QAsymmU8>(
2986         workloadFactory, memoryManager, tensorHandleFactory, 0.5f, -1);
2987 }
2988 
Concat4dDiffShapeDim2Uint8Test(IWorkloadFactory & workloadFactory,const IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory)2989 LayerTestResult<uint8_t, 4> Concat4dDiffShapeDim2Uint8Test(
2990     IWorkloadFactory& workloadFactory,
2991     const IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
2992     const armnn::ITensorHandleFactory& tensorHandleFactory)
2993 {
2994     return Concat4dDiffShapeDim2TestImpl<DataType::QAsymmU8>(
2995         workloadFactory, memoryManager, tensorHandleFactory, 0.5f, -1);
2996 }
2997 
Concat4dDiffShapeDim3Uint8Test(IWorkloadFactory & workloadFactory,const IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory,bool useSubtensor)2998 LayerTestResult<uint8_t, 4> Concat4dDiffShapeDim3Uint8Test(
2999     IWorkloadFactory& workloadFactory,
3000     const IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
3001     const armnn::ITensorHandleFactory& tensorHandleFactory,
3002     bool useSubtensor)
3003 {
3004     return Concat4dDiffShapeDim3TestImpl<DataType::QAsymmU8>(
3005         workloadFactory, memoryManager, tensorHandleFactory, 0.5f, -1, useSubtensor);
3006 }
3007