• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //
2 // Copyright © 2017, 2023 Arm Ltd and Contributors. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5 
6 #include "NeonWorkloadFactoryHelper.hpp"
7 
8 #include <aclCommon/ArmComputeTensorUtils.hpp>
9 #include <armnn/utility/Assert.hpp>
10 #include <armnn/utility/IgnoreUnused.hpp>
11 #include <armnn/utility/PolymorphicDowncast.hpp>
12 #include <armnn/backends/MemCopyWorkload.hpp>
13 
14 #include <aclCommon/test/CreateWorkloadClNeon.hpp>
15 
16 #include <neon/NeonWorkloadFactory.hpp>
17 #include <neon/NeonTensorHandle.hpp>
18 #include <neon/workloads/NeonWorkloadUtils.hpp>
19 #include <neon/workloads/NeonWorkloads.hpp>
20 
21 #include <doctest/doctest.h>
22 
23 TEST_SUITE("CreateWorkloadNeon")
24 {
25 namespace
26 {
27 
CompareIAclTensorHandleShape(IAclTensorHandle * tensorHandle,std::initializer_list<unsigned int> expectedDimensions)28 armnn::PredicateResult CompareIAclTensorHandleShape(IAclTensorHandle* tensorHandle,
29                                                     std::initializer_list<unsigned int> expectedDimensions)
30 {
31     return CompareTensorHandleShape<IAclTensorHandle>(tensorHandle, expectedDimensions);
32 }
33 
TestNeonTensorHandleInfo(armnn::IAclTensorHandle * handle,const armnn::TensorInfo & expectedInfo)34 bool TestNeonTensorHandleInfo(armnn::IAclTensorHandle* handle, const armnn::TensorInfo& expectedInfo)
35 {
36     using namespace armnn::armcomputetensorutils;
37 
38     const arm_compute::ITensorInfo* handleInfo = handle->GetTensor().info();
39     const arm_compute::TensorInfo expectedAclInfo = BuildArmComputeTensorInfo(expectedInfo);
40 
41     if (handleInfo->data_type() != expectedAclInfo.data_type())
42     {
43         return false;
44     }
45 
46     if (handleInfo->num_dimensions() != expectedAclInfo.num_dimensions())
47     {
48         return false;
49     }
50 
51     if (handleInfo->quantization_info() != expectedAclInfo.quantization_info())
52     {
53         return false;
54     }
55 
56     for (std::size_t d = 0; d < expectedAclInfo.num_dimensions(); ++d)
57     {
58         if (handleInfo->dimension(d) != expectedAclInfo.dimension(d))
59         {
60             return false;
61         }
62     }
63 
64     return true;
65 }
66 
67 } // namespace
68 
69 template <typename armnn::DataType DataType>
NeonCreateActivationWorkloadTest()70 static void NeonCreateActivationWorkloadTest()
71 {
72     Graph graph;
73     NeonWorkloadFactory factory =
74         NeonWorkloadFactoryHelper::GetFactory(NeonWorkloadFactoryHelper::GetMemoryManager());
75 
76     auto workload = CreateActivationWorkloadTest<NeonActivationWorkload, DataType>(factory, graph);
77 
78     // Checks that inputs/outputs are as we expect them (see definition of CreateActivationWorkloadTest).
79     ActivationQueueDescriptor queueDescriptor = workload->GetData();
80     auto inputHandle  = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
81     auto outputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Outputs[0]);
82     CHECK(TestNeonTensorHandleInfo(inputHandle, TensorInfo({1, 1}, DataType)));
83     CHECK(TestNeonTensorHandleInfo(outputHandle, TensorInfo({1, 1}, DataType)));
84 }
85 
86 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
87 TEST_CASE("CreateActivationFloat16Workload")
88 {
89     NeonCreateActivationWorkloadTest<DataType::Float16>();
90 }
91 #endif
92 
93 TEST_CASE("CreateActivationFloatWorkload")
94 {
95     NeonCreateActivationWorkloadTest<DataType::Float32>();
96 }
97 
98 template <typename WorkloadType,
99           typename DescriptorType,
100           typename LayerType,
101           armnn::DataType DataType>
NeonCreateElementwiseWorkloadTest()102 static void NeonCreateElementwiseWorkloadTest()
103 {
104     Graph graph;
105     NeonWorkloadFactory factory =
106         NeonWorkloadFactoryHelper::GetFactory(NeonWorkloadFactoryHelper::GetMemoryManager());
107 
108     auto workload = CreateElementwiseWorkloadTest<WorkloadType, DescriptorType, LayerType, DataType>(factory, graph);
109 
110     DescriptorType queueDescriptor = workload->GetData();
111     auto inputHandle1 = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
112     auto inputHandle2 = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Inputs[1]);
113     auto outputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Outputs[0]);
114     CHECK(TestNeonTensorHandleInfo(inputHandle1, TensorInfo({2, 3}, DataType)));
115     CHECK(TestNeonTensorHandleInfo(inputHandle2, TensorInfo({2, 3}, DataType)));
116     CHECK(TestNeonTensorHandleInfo(outputHandle, TensorInfo({2, 3}, DataType)));
117 }
118 
119 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
120 TEST_CASE("CreateAdditionFloat16Workload")
121 {
122     NeonCreateElementwiseWorkloadTest<NeonAdditionWorkload,
123                                       AdditionQueueDescriptor,
124                                       AdditionLayer,
125                                       DataType::Float16>();
126 }
127 #endif
128 
129 TEST_CASE("CreateAdditionFloatWorkload")
130 {
131     NeonCreateElementwiseWorkloadTest<NeonAdditionWorkload,
132                                       AdditionQueueDescriptor,
133                                       AdditionLayer,
134                                       DataType::Float32>();
135 }
136 
137 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
138 TEST_CASE("CreateSubtractionFloat16Workload")
139 {
140     NeonCreateElementwiseWorkloadTest<NeonSubtractionWorkload,
141                                       SubtractionQueueDescriptor,
142                                       SubtractionLayer,
143                                       DataType::Float16>();
144 }
145 #endif
146 
147 TEST_CASE("CreateSubtractionFloatWorkload")
148 {
149     NeonCreateElementwiseWorkloadTest<NeonSubtractionWorkload,
150                                       SubtractionQueueDescriptor,
151                                       SubtractionLayer,
152                                       DataType::Float32>();
153 }
154 
155 TEST_CASE("CreateSubtractionUint8Workload")
156 {
157     NeonCreateElementwiseWorkloadTest<NeonSubtractionWorkload,
158                                       SubtractionQueueDescriptor,
159                                       SubtractionLayer,
160                                       DataType::QAsymmU8>();
161 }
162 
163 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
164 TEST_CASE("CreateMultiplicationFloat16Workload")
165 {
166     NeonCreateElementwiseWorkloadTest<NeonMultiplicationWorkload,
167                                       MultiplicationQueueDescriptor,
168                                       MultiplicationLayer,
169                                       DataType::Float16>();
170 }
171 #endif
172 
173 TEST_CASE("CreateMultiplicationFloatWorkload")
174 {
175     NeonCreateElementwiseWorkloadTest<NeonMultiplicationWorkload,
176                                       MultiplicationQueueDescriptor,
177                                       MultiplicationLayer,
178                                       DataType::Float32>();
179 }
180 
181 TEST_CASE("CreateMultiplicationUint8Workload")
182 {
183     NeonCreateElementwiseWorkloadTest<NeonMultiplicationWorkload,
184                                       MultiplicationQueueDescriptor,
185                                       MultiplicationLayer,
186                                       DataType::QAsymmU8>();
187 }
188 
189 TEST_CASE("CreateDivisionFloatWorkloadTest")
190 {
191     NeonCreateElementwiseWorkloadTest<NeonDivisionWorkload,
192                                       DivisionQueueDescriptor,
193                                       DivisionLayer,
194                                       armnn::DataType::Float32>();
195 }
196 
197 template <typename BatchNormalizationWorkloadType, typename armnn::DataType DataType>
NeonCreateBatchNormalizationWorkloadTest(DataLayout dataLayout)198 static void NeonCreateBatchNormalizationWorkloadTest(DataLayout dataLayout)
199 {
200     Graph graph;
201     NeonWorkloadFactory factory =
202         NeonWorkloadFactoryHelper::GetFactory(NeonWorkloadFactoryHelper::GetMemoryManager());
203 
204     auto workload = CreateBatchNormalizationWorkloadTest<BatchNormalizationWorkloadType, DataType>
205                     (factory, graph, dataLayout);
206 
207     // Checks that outputs and inputs are as we expect them (see definition of CreateBatchNormalizationWorkloadTest).
208     BatchNormalizationQueueDescriptor queueDescriptor = workload->GetData();
209     auto inputHandle  = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
210     auto outputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Outputs[0]);
211 
212     TensorShape inputShape  = (dataLayout == DataLayout::NCHW) ? TensorShape{2, 3, 4, 4} : TensorShape{2, 4, 4, 3};
213     TensorShape outputShape = (dataLayout == DataLayout::NCHW) ? TensorShape{2, 3, 4, 4} : TensorShape{2, 4, 4, 3};
214 
215     CHECK(TestNeonTensorHandleInfo(inputHandle, TensorInfo(inputShape, DataType)));
216     CHECK(TestNeonTensorHandleInfo(outputHandle, TensorInfo(outputShape, DataType)));
217 }
218 
219 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
220 TEST_CASE("CreateBatchNormalizationFloat16NchwWorkload")
221 {
222     NeonCreateBatchNormalizationWorkloadTest<NeonBatchNormalizationWorkload, DataType::Float16>(DataLayout::NCHW);
223 }
224 
225 TEST_CASE("CreateBatchNormalizationFloat16NhwcWorkload")
226 {
227     NeonCreateBatchNormalizationWorkloadTest<NeonBatchNormalizationWorkload, DataType::Float16>(DataLayout::NHWC);
228 }
229 #endif
230 
231 TEST_CASE("CreateBatchNormalizationFloatNchwWorkload")
232 {
233     NeonCreateBatchNormalizationWorkloadTest<NeonBatchNormalizationWorkload, DataType::Float32>(DataLayout::NCHW);
234 }
235 
236 TEST_CASE("CreateBatchNormalizationFloatNhwcWorkload")
237 {
238     NeonCreateBatchNormalizationWorkloadTest<NeonBatchNormalizationWorkload, DataType::Float32>(DataLayout::NHWC);
239 }
240 
241 template <typename armnn::DataType DataType>
NeonCreateConvolution2dWorkloadTest(DataLayout dataLayout=DataLayout::NCHW)242 static void NeonCreateConvolution2dWorkloadTest(DataLayout dataLayout = DataLayout::NCHW)
243 {
244     Graph graph;
245     NeonWorkloadFactory factory =
246         NeonWorkloadFactoryHelper::GetFactory(NeonWorkloadFactoryHelper::GetMemoryManager());
247 
248     auto workload = CreateConvolution2dWorkloadTest<NeonConvolution2dWorkload, DataType>(factory, graph, dataLayout);
249 
250     TensorShape inputShape  = (dataLayout == DataLayout::NCHW) ? TensorShape{2, 3, 8, 16} : TensorShape{2, 8, 16, 3};
251     TensorShape outputShape = (dataLayout == DataLayout::NCHW) ? TensorShape{2, 2, 2, 10} : TensorShape{2, 2, 10, 2};
252 
253     // Checks that outputs and inputs are as we expect them (see definition of CreateConvolution2dWorkloadTest).
254     Convolution2dQueueDescriptor queueDescriptor = workload->GetData();
255     auto inputHandle  = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
256     auto outputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Outputs[0]);
257     CHECK(TestNeonTensorHandleInfo(inputHandle, TensorInfo(inputShape, DataType)));
258     CHECK(TestNeonTensorHandleInfo(outputHandle,  TensorInfo(outputShape, DataType)));
259 }
260 
261 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
262 TEST_CASE("CreateConvolution2dFloat16NchwWorkload")
263 {
264     NeonCreateConvolution2dWorkloadTest<DataType::Float16>();
265 }
266 
267 TEST_CASE("CreateConvolution2dFloat16NhwcWorkload")
268 {
269     NeonCreateConvolution2dWorkloadTest<DataType::Float16>(DataLayout::NHWC);
270 }
271 
272 #endif
273 TEST_CASE("CreateConvolution2dFloatNchwWorkload")
274 {
275     NeonCreateConvolution2dWorkloadTest<DataType::Float32>();
276 }
277 
278 TEST_CASE("CreateConvolution2dFloatNhwcWorkload")
279 {
280     NeonCreateConvolution2dWorkloadTest<DataType::Float32>(DataLayout::NHWC);
281 }
282 
283 TEST_CASE("CreateConvolution2dFastMathEnabledWorkload")
284 {
285     Graph graph;
286     using ModelOptions = std::vector<BackendOptions>;
287     ModelOptions modelOptions = {};
288     BackendOptions cpuAcc("CpuAcc",
289     {
290         { "FastMathEnabled", true }
291     });
292     modelOptions.push_back(cpuAcc);
293     NeonWorkloadFactory factory =
294         NeonWorkloadFactoryHelper::GetFactory(NeonWorkloadFactoryHelper::GetMemoryManager(), modelOptions);
295 
296     auto workload =
297         CreateConvolution2dWorkloadFastMathTest<NeonConvolution2dWorkload, armnn::DataType::Float32>(factory,
298                                                                                              graph,
299                                                                                              DataLayout::NCHW,
300                                                                                              modelOptions);
301 
302     ARMNN_ASSERT(workload != nullptr);
303     auto conv2dWorkload = PolymorphicDowncast<NeonConvolution2dWorkload*>(workload.get());
304     IgnoreUnused(conv2dWorkload);
305     ARMNN_ASSERT(conv2dWorkload != nullptr);
306     ARMNN_ASSERT(conv2dWorkload->GetConvolutionMethod() == arm_compute::ConvolutionMethod::WINOGRAD);
307 }
308 
309 template <typename armnn::DataType DataType>
NeonCreateDepthWiseConvolutionWorkloadTest(DataLayout dataLayout)310 static void NeonCreateDepthWiseConvolutionWorkloadTest(DataLayout dataLayout)
311 {
312     Graph graph;
313     NeonWorkloadFactory factory =
314         NeonWorkloadFactoryHelper::GetFactory(NeonWorkloadFactoryHelper::GetMemoryManager());
315 
316     auto workload = CreateDepthwiseConvolution2dWorkloadTest<NeonDepthwiseConvolutionWorkload,
317                                                              DataType>(factory, graph, dataLayout);
318 
319     // Checks that inputs/outputs are as we expect them (see definition of CreateNormalizationWorkloadTest).
320     DepthwiseConvolution2dQueueDescriptor queueDescriptor = workload->GetData();
321     auto inputHandle  = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
322     auto outputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Outputs[0]);
323 
324     TensorShape inputShape  = (dataLayout == DataLayout::NCHW) ? std::initializer_list<unsigned int>({ 2, 2, 5, 5 })
325                                                                : std::initializer_list<unsigned int>({ 2, 5, 5, 2 });
326     TensorShape outputShape = (dataLayout == DataLayout::NCHW) ? std::initializer_list<unsigned int>({ 2, 2, 5, 5 })
327                                                                : std::initializer_list<unsigned int>({ 2, 5, 5, 2 });
328 
329     CHECK(TestNeonTensorHandleInfo(inputHandle, TensorInfo(inputShape, DataType)));
330     CHECK(TestNeonTensorHandleInfo(outputHandle, TensorInfo(outputShape, DataType)));
331 }
332 
333 TEST_CASE("CreateDepthWiseConvolution2dFloat32NhwcWorkload")
334 {
335     NeonCreateDepthWiseConvolutionWorkloadTest<DataType::Float32>(DataLayout::NHWC);
336 }
337 
338 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
339 TEST_CASE("CreateDepthWiseConvolution2dFloat16NhwcWorkload")
340 {
341     NeonCreateDepthWiseConvolutionWorkloadTest<DataType::Float16>(DataLayout::NHWC);
342 }
343 #endif
344 
345 template <typename FullyConnectedWorkloadType, typename armnn::DataType DataType>
NeonCreateFullyConnectedWorkloadTest()346 static void NeonCreateFullyConnectedWorkloadTest()
347 {
348     Graph graph;
349     NeonWorkloadFactory factory =
350         NeonWorkloadFactoryHelper::GetFactory(NeonWorkloadFactoryHelper::GetMemoryManager());
351 
352     auto workload = CreateFullyConnectedWorkloadTest<FullyConnectedWorkloadType, DataType>(factory, graph);
353 
354     // Checks that outputs and inputs are as we expect them (see definition of CreateFullyConnectedWorkloadTest).
355     FullyConnectedQueueDescriptor queueDescriptor = workload->GetData();
356     auto inputHandle  = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
357     auto outputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Outputs[0]);
358 
359     // Checks that outputs and inputs are as we expect them (see definition of CreateFullyConnectedWorkloadTest).
360     float inputsQScale = 1.0f;
361     float outputQScale = DataType == armnn::DataType::QAsymmU8 ? 2.0f : 1.0;
362     CHECK(TestNeonTensorHandleInfo(inputHandle, TensorInfo({3, 1, 4, 5}, DataType, inputsQScale)));
363     CHECK(TestNeonTensorHandleInfo(outputHandle, TensorInfo({3, 7}, DataType, outputQScale)));
364 }
365 
366 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
367 TEST_CASE("CreateFullyConnectedFloat16Workload")
368 {
369     NeonCreateFullyConnectedWorkloadTest<NeonFullyConnectedWorkload, DataType::Float16>();
370 }
371 #endif
372 
373 TEST_CASE("CreateFullyConnectedFloatWorkload")
374 {
375     NeonCreateFullyConnectedWorkloadTest<NeonFullyConnectedWorkload, DataType::Float32>();
376 }
377 
378 TEST_CASE("CreateFullyConnectedQAsymmU8Workload")
379 {
380     NeonCreateFullyConnectedWorkloadTest<NeonFullyConnectedWorkload, DataType::QAsymmU8>();
381 }
382 
383 TEST_CASE("CreateFullyConnectedQAsymmS8Workload")
384 {
385     NeonCreateFullyConnectedWorkloadTest<NeonFullyConnectedWorkload, DataType::QAsymmS8>();
386 }
387 
388 template <typename NormalizationWorkloadType, typename armnn::DataType DataType>
NeonCreateNormalizationWorkloadTest(DataLayout dataLayout)389 static void NeonCreateNormalizationWorkloadTest(DataLayout dataLayout)
390 {
391     Graph graph;
392     NeonWorkloadFactory factory =
393         NeonWorkloadFactoryHelper::GetFactory(NeonWorkloadFactoryHelper::GetMemoryManager());
394 
395     auto workload = CreateNormalizationWorkloadTest<NormalizationWorkloadType, DataType>(factory, graph, dataLayout);
396 
397     // Checks that outputs and inputs are as we expect them (see definition of CreateNormalizationWorkloadTest).
398     NormalizationQueueDescriptor queueDescriptor = workload->GetData();
399     auto inputHandle  = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
400     auto outputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Outputs[0]);
401 
402     TensorShape inputShape  = (dataLayout == DataLayout::NCHW) ? TensorShape{3, 5, 5, 1} : TensorShape{3, 1, 5, 5};
403     TensorShape outputShape = (dataLayout == DataLayout::NCHW) ? TensorShape{3, 5, 5, 1} : TensorShape{3, 1, 5, 5};
404 
405     CHECK(TestNeonTensorHandleInfo(inputHandle, TensorInfo(inputShape, DataType)));
406     CHECK(TestNeonTensorHandleInfo(outputHandle, TensorInfo(outputShape, DataType)));
407 }
408 
409 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
410 TEST_CASE("CreateNormalizationFloat16NchwWorkload")
411 {
412     NeonCreateNormalizationWorkloadTest<NeonNormalizationFloatWorkload, DataType::Float16>(DataLayout::NCHW);
413 }
414 
415 TEST_CASE("CreateNormalizationFloat16NhwcWorkload")
416 {
417     NeonCreateNormalizationWorkloadTest<NeonNormalizationFloatWorkload, DataType::Float16>(DataLayout::NHWC);
418 }
419 #endif
420 
421 TEST_CASE("CreateNormalizationFloatNchwWorkload")
422 {
423     NeonCreateNormalizationWorkloadTest<NeonNormalizationFloatWorkload, DataType::Float32>(DataLayout::NCHW);
424 }
425 
426 TEST_CASE("CreateNormalizationFloatNhwcWorkload")
427 {
428     NeonCreateNormalizationWorkloadTest<NeonNormalizationFloatWorkload, DataType::Float32>(DataLayout::NHWC);
429 }
430 
431 
432 template <typename armnn::DataType DataType>
NeonCreatePooling2dWorkloadTest(DataLayout dataLayout=DataLayout::NCHW)433 static void NeonCreatePooling2dWorkloadTest(DataLayout dataLayout = DataLayout::NCHW)
434 {
435     Graph graph;
436     NeonWorkloadFactory factory =
437         NeonWorkloadFactoryHelper::GetFactory(NeonWorkloadFactoryHelper::GetMemoryManager());
438 
439     auto workload = CreatePooling2dWorkloadTest<NeonPooling2dWorkload, DataType>(factory, graph, dataLayout);
440 
441     TensorShape inputShape  = (dataLayout == DataLayout::NCHW) ? TensorShape{3, 2, 5, 5} : TensorShape{3, 5, 5, 2};
442     TensorShape outputShape = (dataLayout == DataLayout::NCHW) ? TensorShape{3, 2, 2, 4} : TensorShape{3, 2, 4, 2};
443 
444     // Checks that outputs and inputs are as we expect them (see definition of CreatePooling2dWorkloadTest).
445     Pooling2dQueueDescriptor queueDescriptor = workload->GetData();
446     auto inputHandle  = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
447     auto outputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Outputs[0]);
448     CHECK(TestNeonTensorHandleInfo(inputHandle, TensorInfo(inputShape, DataType)));
449     CHECK(TestNeonTensorHandleInfo(outputHandle, TensorInfo(outputShape, DataType)));
450 }
451 
452 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
453 TEST_CASE("CreatePooling2dFloat16Workload")
454 {
455     NeonCreatePooling2dWorkloadTest<DataType::Float16>();
456 }
457 #endif
458 
459 TEST_CASE("CreatePooling2dFloatNchwWorkload")
460 {
461     NeonCreatePooling2dWorkloadTest<DataType::Float32>(DataLayout::NCHW);
462 }
463 
464 TEST_CASE("CreatePooling2dFloatNhwcWorkload")
465 {
466     NeonCreatePooling2dWorkloadTest<DataType::Float32>(DataLayout::NHWC);
467 }
468 
469 TEST_CASE("CreatePooling2dUint8NchwWorkload")
470 {
471     NeonCreatePooling2dWorkloadTest<DataType::QAsymmU8>(DataLayout::NCHW);
472 }
473 
474 TEST_CASE("CreatePooling2dUint8NhwcWorkload")
475 {
476     NeonCreatePooling2dWorkloadTest<DataType::QAsymmU8>(DataLayout::NHWC);
477 }
478 
NeonCreatePreluWorkloadTest(const armnn::TensorShape & inputShape,const armnn::TensorShape & alphaShape,const armnn::TensorShape & outputShape,armnn::DataType dataType)479 static void NeonCreatePreluWorkloadTest(const armnn::TensorShape& inputShape,
480                                         const armnn::TensorShape& alphaShape,
481                                         const armnn::TensorShape& outputShape,
482                                         armnn::DataType dataType)
483 {
484     Graph graph;
485     NeonWorkloadFactory factory =
486             NeonWorkloadFactoryHelper::GetFactory(NeonWorkloadFactoryHelper::GetMemoryManager());
487 
488     auto workload = CreatePreluWorkloadTest<NeonPreluWorkload>(factory,
489                                                                graph,
490                                                                inputShape,
491                                                                alphaShape,
492                                                                outputShape,
493                                                                dataType);
494 
495     // Checks that outputs and inputs are as we expect them (see definition of CreateReshapeWorkloadTest).
496     PreluQueueDescriptor queueDescriptor = workload->GetData();
497     auto inputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
498     auto alphaHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Inputs[1]);
499     auto outputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Outputs[0]);
500     CHECK(TestNeonTensorHandleInfo(inputHandle, TensorInfo(inputShape, dataType)));
501     CHECK(TestNeonTensorHandleInfo(alphaHandle, TensorInfo(alphaShape, dataType)));
502     CHECK(TestNeonTensorHandleInfo(outputHandle, TensorInfo(outputShape, dataType)));
503 }
504 
505 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
506 TEST_CASE("CreatePreluFloat16Workload")
507 {
508     NeonCreatePreluWorkloadTest({ 1, 4, 1, 2 }, { 5, 4, 3, 1 }, { 5, 4, 3, 2 }, DataType::Float16);
509 }
510 #endif
511 
512 TEST_CASE("CreatePreluFloatWorkload")
513 {
514     NeonCreatePreluWorkloadTest({ 1, 4, 1, 2 }, { 5, 4, 3, 1 }, { 5, 4, 3, 2 }, DataType::Float32);
515 }
516 
517 TEST_CASE("CreatePreluUint8Workload")
518 {
519     NeonCreatePreluWorkloadTest({ 1, 4, 1, 2 }, { 5, 4, 3, 1 }, { 5, 4, 3, 2 }, DataType::QAsymmU8);
520 }
521 
522 template <typename armnn::DataType DataType>
NeonCreateReshapeWorkloadTest()523 static void NeonCreateReshapeWorkloadTest()
524 {
525     Graph graph;
526     NeonWorkloadFactory factory =
527         NeonWorkloadFactoryHelper::GetFactory(NeonWorkloadFactoryHelper::GetMemoryManager());
528 
529     auto workload = CreateReshapeWorkloadTest<NeonReshapeWorkload, DataType>(factory, graph);
530 
531     // Checks that outputs and inputs are as we expect them (see definition of CreateReshapeWorkloadTest).
532     ReshapeQueueDescriptor queueDescriptor = workload->GetData();
533     auto inputHandle  = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
534     auto outputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Outputs[0]);
535     CHECK(TestNeonTensorHandleInfo(inputHandle, TensorInfo({4, 1}, DataType)));
536     CHECK(TestNeonTensorHandleInfo(outputHandle, TensorInfo({1, 4}, DataType)));
537 }
538 
539 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
540 TEST_CASE("CreateReshapeFloat16Workload")
541 {
542     NeonCreateReshapeWorkloadTest<DataType::Float16>();
543 }
544 #endif
545 
546 TEST_CASE("CreateReshapeFloatWorkload")
547 {
548     NeonCreateReshapeWorkloadTest<DataType::Float32>();
549 }
550 
551 TEST_CASE("CreateReshapeUint8Workload")
552 {
553     NeonCreateReshapeWorkloadTest<DataType::QAsymmU8>();
554 }
555 
556 template <typename ResizeWorkloadType, armnn::DataType DataType>
NeonCreateResizeWorkloadTest(DataLayout dataLayout)557 static void NeonCreateResizeWorkloadTest(DataLayout dataLayout)
558 {
559     Graph graph;
560     NeonWorkloadFactory factory =
561             NeonWorkloadFactoryHelper::GetFactory(NeonWorkloadFactoryHelper::GetMemoryManager());
562     auto workload = CreateResizeBilinearWorkloadTest<ResizeWorkloadType, DataType>(factory, graph, dataLayout);
563 
564     auto queueDescriptor = workload->GetData();
565 
566     auto inputHandle  = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
567     auto outputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Outputs[0]);
568 
569     armnn::PredicateResult predResult(true);
570     switch (dataLayout)
571     {
572         case DataLayout::NHWC:
573             predResult = CompareIAclTensorHandleShape(inputHandle, { 2, 4, 4, 3 });
574             CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
575             predResult = CompareIAclTensorHandleShape(outputHandle, { 2, 2, 2, 3 });
576             CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
577             break;
578         default: // DataLayout::NCHW
579             predResult = CompareIAclTensorHandleShape(inputHandle, { 2, 3, 4, 4 });
580             CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
581             predResult = CompareIAclTensorHandleShape(outputHandle, { 2, 3, 2, 2 });
582             CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
583     }
584 }
585 
586 TEST_CASE("CreateResizeFloat32NchwWorkload")
587 {
588     NeonCreateResizeWorkloadTest<NeonResizeWorkload, armnn::DataType::Float32>(DataLayout::NCHW);
589 }
590 
591 TEST_CASE("CreateResizeUint8NchwWorkload")
592 {
593     NeonCreateResizeWorkloadTest<NeonResizeWorkload, armnn::DataType::QAsymmU8>(DataLayout::NCHW);
594 }
595 
596 TEST_CASE("CreateResizeFloat32NhwcWorkload")
597 {
598     NeonCreateResizeWorkloadTest<NeonResizeWorkload, armnn::DataType::Float32>(DataLayout::NHWC);
599 }
600 
601 TEST_CASE("CreateResizeUint8NhwcWorkload")
602 {
603     NeonCreateResizeWorkloadTest<NeonResizeWorkload, armnn::DataType::QAsymmU8>(DataLayout::NHWC);
604 }
605 
606 template <typename SoftmaxWorkloadType, typename armnn::DataType DataType>
NeonCreateSoftmaxWorkloadTest()607 static void NeonCreateSoftmaxWorkloadTest()
608 {
609     Graph graph;
610     NeonWorkloadFactory factory =
611         NeonWorkloadFactoryHelper::GetFactory(NeonWorkloadFactoryHelper::GetMemoryManager());
612 
613     auto workload = CreateSoftmaxWorkloadTest<SoftmaxWorkloadType, DataType>(factory, graph);
614 
615     // Checks that outputs and inputs are as we expect them (see definition of CreateSoftmaxWorkloadTest).
616     SoftmaxQueueDescriptor queueDescriptor = workload->GetData();
617     auto inputHandle  = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
618     auto outputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Outputs[0]);
619     armnn::TensorInfo tensorInfo({4, 1}, DataType);
620     if (DataType == armnn::DataType::QAsymmU8)
621     {
622         tensorInfo.SetQuantizationOffset(0);
623         tensorInfo.SetQuantizationScale(1.f / 256);
624     }
625     else if (DataType == armnn::DataType::QAsymmS8)
626     {
627         tensorInfo.SetQuantizationOffset(-128);
628         tensorInfo.SetQuantizationScale(1.f / 256);
629     }
630     CHECK(TestNeonTensorHandleInfo(inputHandle, tensorInfo));
631     CHECK(TestNeonTensorHandleInfo(outputHandle, tensorInfo));
632 }
633 
634 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
635 TEST_CASE("CreateSoftmaxFloat16Workload")
636 {
637     NeonCreateSoftmaxWorkloadTest<NeonSoftmaxWorkload, DataType::Float16>();
638 }
639 #endif
640 
641 TEST_CASE("CreateSoftmaxFloatWorkload")
642 {
643     NeonCreateSoftmaxWorkloadTest<NeonSoftmaxWorkload, DataType::Float32>();
644 }
645 
646 TEST_CASE("CreateSoftmaxQAsymmU8Workload")
647 {
648     NeonCreateSoftmaxWorkloadTest<NeonSoftmaxWorkload, DataType::QAsymmU8>();
649 }
650 
651 TEST_CASE("CreateSoftmaxQAsymmS8Workload")
652 {
653     NeonCreateSoftmaxWorkloadTest<NeonSoftmaxWorkload, DataType::QAsymmS8>();
654 }
655 
656 template <typename SpaceToDepthWorkloadType, typename armnn::DataType DataType>
NeonSpaceToDepthWorkloadTest()657 static void NeonSpaceToDepthWorkloadTest()
658 {
659     Graph graph;
660     NeonWorkloadFactory factory =
661             NeonWorkloadFactoryHelper::GetFactory(NeonWorkloadFactoryHelper::GetMemoryManager());
662 
663     auto workload = CreateSpaceToDepthWorkloadTest<SpaceToDepthWorkloadType, DataType>(factory, graph);
664 
665     SpaceToDepthQueueDescriptor queueDescriptor = workload->GetData();
666     auto inputHandle  = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
667     auto outputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Outputs[0]);
668 
669     CHECK(TestNeonTensorHandleInfo(inputHandle, TensorInfo({ 1, 2, 2, 1 }, DataType)));
670     CHECK(TestNeonTensorHandleInfo(outputHandle, TensorInfo({ 1, 1, 1, 4 }, DataType)));
671 }
672 
673 TEST_CASE("CreateSpaceToDepthFloat32Workload")
674 {
675     NeonSpaceToDepthWorkloadTest<NeonSpaceToDepthWorkload, armnn::DataType::Float32>();
676 }
677 
678 TEST_CASE("CreateSpaceToDepthFloat16Workload")
679 {
680     NeonSpaceToDepthWorkloadTest<NeonSpaceToDepthWorkload, armnn::DataType::Float16>();
681 }
682 
683 TEST_CASE("CreateSpaceToDepthQAsymm8Workload")
684 {
685     NeonSpaceToDepthWorkloadTest<NeonSpaceToDepthWorkload, armnn::DataType::QAsymmU8>();
686 }
687 
688 TEST_CASE("CreateSpaceToDepthQSymm16Workload")
689 {
690     NeonSpaceToDepthWorkloadTest<NeonSpaceToDepthWorkload, armnn::DataType::QSymmS16>();
691 }
692 
693 TEST_CASE("CreateSplitterWorkload")
694 {
695     Graph graph;
696     NeonWorkloadFactory factory =
697         NeonWorkloadFactoryHelper::GetFactory(NeonWorkloadFactoryHelper::GetMemoryManager());
698 
699     auto workload = CreateSplitterWorkloadTest<NeonSplitterWorkload, DataType::Float32>(factory, graph);
700 
701     // Checks that outputs are as we expect them (see definition of CreateSplitterWorkloadTest).
702     SplitterQueueDescriptor queueDescriptor = workload->GetData();
703     auto inputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
704     CHECK(TestNeonTensorHandleInfo(inputHandle, TensorInfo({5, 7, 7}, DataType::Float32)));
705 
706     auto outputHandle0 = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Outputs[0]);
707     CHECK(TestNeonTensorHandleInfo(outputHandle0, TensorInfo({1, 7, 7}, DataType::Float32)));
708 
709     auto outputHandle1 = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Outputs[1]);
710     CHECK(TestNeonTensorHandleInfo(outputHandle1, TensorInfo({2, 7, 7}, DataType::Float32)));
711 
712     auto outputHandle2 = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Outputs[2]);
713     CHECK(TestNeonTensorHandleInfo(outputHandle2, TensorInfo({2, 7, 7}, DataType::Float32)));
714 }
715 
716 TEST_CASE("CreateSplitterConcat")
717 {
718     // Tests that it is possible to decide which output of the splitter layer
719     // should be lined to which input of the concat layer.
720     // We tested that is is possible to specify 0th output
721     // of the splitter to be the 1st input to the concat, and the 1st output of the splitter to be 0th input
722     // of the concat.
723 
724     Graph graph;
725     NeonWorkloadFactory factory =
726         NeonWorkloadFactoryHelper::GetFactory(NeonWorkloadFactoryHelper::GetMemoryManager());
727 
728     auto workloads =
729         CreateSplitterConcatWorkloadTest<NeonSplitterWorkload, NeonConcatWorkload,
730             DataType::Float32>(factory, graph);
731 
732     auto wlSplitter = std::move(workloads.first);
733     auto wlConcat = std::move(workloads.second);
734 
735     //Checks that the index of inputs/outputs matches what we declared on InputDescriptor construction.
736     armnn::IAclTensorHandle* sOut0 = dynamic_cast<armnn::IAclTensorHandle*>(wlSplitter->GetData().m_Outputs[0]);
737     armnn::IAclTensorHandle* sOut1 = dynamic_cast<armnn::IAclTensorHandle*>(wlSplitter->GetData().m_Outputs[1]);
738     armnn::IAclTensorHandle* mIn0 = dynamic_cast<armnn::IAclTensorHandle*>(wlConcat->GetData().m_Inputs[0]);
739     armnn::IAclTensorHandle* mIn1 = dynamic_cast<armnn::IAclTensorHandle*>(wlConcat->GetData().m_Inputs[1]);
740 
741     CHECK(sOut0);
742     CHECK(sOut1);
743     CHECK(mIn0);
744     CHECK(mIn1);
745 
746     bool validDataPointers = (sOut0 == mIn1) && (sOut1 == mIn0);
747 
748     CHECK(validDataPointers);
749 }
750 
751 TEST_CASE("CreateSingleOutputMultipleInputs")
752 {
753     // Tests that it is possible to assign multiple (two) different layers to each of the outputs of a splitter layer.
754     // We created a splitter with two outputs. That each of those outputs is used by two different activation layers
755 
756     Graph graph;
757     NeonWorkloadFactory factory =
758         NeonWorkloadFactoryHelper::GetFactory(NeonWorkloadFactoryHelper::GetMemoryManager());
759 
760     std::unique_ptr<NeonSplitterWorkload> wlSplitter;
761     std::unique_ptr<NeonActivationWorkload> wlActiv0_0;
762     std::unique_ptr<NeonActivationWorkload> wlActiv0_1;
763     std::unique_ptr<NeonActivationWorkload> wlActiv1_0;
764     std::unique_ptr<NeonActivationWorkload> wlActiv1_1;
765 
766     CreateSplitterMultipleInputsOneOutputWorkloadTest<NeonSplitterWorkload,
767         NeonActivationWorkload, DataType::Float32>(factory, graph, wlSplitter, wlActiv0_0, wlActiv0_1,
768                                                    wlActiv1_0, wlActiv1_1);
769 
770     armnn::IAclTensorHandle* sOut0 = dynamic_cast<armnn::IAclTensorHandle*>(wlSplitter->GetData().m_Outputs[0]);
771     armnn::IAclTensorHandle* sOut1 = dynamic_cast<armnn::IAclTensorHandle*>(wlSplitter->GetData().m_Outputs[1]);
772     armnn::IAclTensorHandle* activ0_0Im = dynamic_cast<armnn::IAclTensorHandle*>(wlActiv0_0->GetData().m_Inputs[0]);
773     armnn::IAclTensorHandle* activ0_1Im = dynamic_cast<armnn::IAclTensorHandle*>(wlActiv0_1->GetData().m_Inputs[0]);
774     armnn::IAclTensorHandle* activ1_0Im = dynamic_cast<armnn::IAclTensorHandle*>(wlActiv1_0->GetData().m_Inputs[0]);
775     armnn::IAclTensorHandle* activ1_1Im = dynamic_cast<armnn::IAclTensorHandle*>(wlActiv1_1->GetData().m_Inputs[0]);
776 
777 
778     CHECK(sOut0);
779     CHECK(sOut1);
780     CHECK(activ0_0Im);
781     CHECK(activ0_1Im);
782     CHECK(activ1_0Im);
783     CHECK(activ1_1Im);
784 
785     bool validDataPointers = (sOut0 == activ0_0Im) && (sOut0 == activ0_1Im) &&
786                              (sOut1 == activ1_0Im) && (sOut1 == activ1_1Im);
787 
788     CHECK(validDataPointers);
789 }
790 
791 #if defined(ARMNNREF_ENABLED)
792 
793 // This test unit needs the reference backend, it's not available if the reference backend is not built
794 
795 TEST_CASE("CreateMemCopyWorkloadsNeon")
796 {
797     NeonWorkloadFactory factory =
798         NeonWorkloadFactoryHelper::GetFactory(NeonWorkloadFactoryHelper::GetMemoryManager());
799     CreateMemCopyWorkloads<IAclTensorHandle>(factory);
800 }
801 
802 #endif
803 
804 template <typename L2NormalizationWorkloadType, typename armnn::DataType DataType>
NeonCreateL2NormalizationWorkloadTest(DataLayout dataLayout)805 static void NeonCreateL2NormalizationWorkloadTest(DataLayout dataLayout)
806 {
807     Graph graph;
808     NeonWorkloadFactory factory =
809         NeonWorkloadFactoryHelper::GetFactory(NeonWorkloadFactoryHelper::GetMemoryManager());
810 
811     auto workload =
812             CreateL2NormalizationWorkloadTest<L2NormalizationWorkloadType, DataType>(factory, graph, dataLayout);
813 
814     // Checks that inputs/outputs are as we expect them (see definition of CreateNormalizationWorkloadTest).
815     L2NormalizationQueueDescriptor queueDescriptor = workload->GetData();
816     auto inputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
817     auto outputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Outputs[0]);
818 
819     TensorShape inputShape  = (dataLayout == DataLayout::NCHW) ?
820                 TensorShape{ 5, 20, 50, 67 } : TensorShape{ 5, 50, 67, 20 };
821     TensorShape outputShape = (dataLayout == DataLayout::NCHW) ?
822                 TensorShape{ 5, 20, 50, 67 } : TensorShape{ 5, 50, 67, 20 };
823 
824     CHECK(TestNeonTensorHandleInfo(inputHandle, TensorInfo(inputShape, DataType)));
825     CHECK(TestNeonTensorHandleInfo(outputHandle, TensorInfo(outputShape, DataType)));
826 }
827 
828 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
829 TEST_CASE("CreateL2NormalizationFloat16NchwWorkload")
830 {
831     NeonCreateL2NormalizationWorkloadTest<NeonL2NormalizationFloatWorkload, DataType::Float16>(DataLayout::NCHW);
832 }
833 
834 TEST_CASE("CreateL2NormalizationFloat16NhwcWorkload")
835 {
836     NeonCreateL2NormalizationWorkloadTest<NeonL2NormalizationFloatWorkload, DataType::Float16>(DataLayout::NHWC);
837 }
838 #endif
839 
840 TEST_CASE("CreateL2NormalizationNchwWorkload")
841 {
842     NeonCreateL2NormalizationWorkloadTest<NeonL2NormalizationFloatWorkload, DataType::Float32>(DataLayout::NCHW);
843 }
844 
845 TEST_CASE("CreateL2NormalizationNhwcWorkload")
846 {
847     NeonCreateL2NormalizationWorkloadTest<NeonL2NormalizationFloatWorkload, DataType::Float32>(DataLayout::NHWC);
848 }
849 
850 template <typename LogSoftmaxWorkloadType, typename armnn::DataType DataType>
NeonCreateLogSoftmaxWorkloadTest()851 static void NeonCreateLogSoftmaxWorkloadTest()
852 {
853     Graph graph;
854     NeonWorkloadFactory factory =
855         NeonWorkloadFactoryHelper::GetFactory(NeonWorkloadFactoryHelper::GetMemoryManager());
856 
857     auto workload = CreateLogSoftmaxWorkloadTest<LogSoftmaxWorkloadType, DataType>(factory, graph);
858 
859     // Checks that outputs and inputs are as we expect them (see definition of CreateLogSoftmaxWorkloadTest).
860     LogSoftmaxQueueDescriptor queueDescriptor = workload->GetData();
861     auto inputHandle  = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
862     auto outputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Outputs[0]);
863     armnn::TensorInfo tensorInfo({4, 1}, DataType);
864 
865     CHECK(TestNeonTensorHandleInfo(inputHandle, tensorInfo));
866     CHECK(TestNeonTensorHandleInfo(outputHandle, tensorInfo));
867 }
868 
869 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
870 TEST_CASE("CreateLogSoftmaxFloat16Workload")
871 {
872     NeonCreateLogSoftmaxWorkloadTest<NeonLogSoftmaxWorkload, DataType::Float16>();
873 }
874 #endif
875 
876 TEST_CASE("CreateLogSoftmaxFloatWorkload")
877 {
878     NeonCreateLogSoftmaxWorkloadTest<NeonLogSoftmaxWorkload, DataType::Float32>();
879 }
880 
881 template <typename LstmWorkloadType>
NeonCreateLstmWorkloadTest()882 static void NeonCreateLstmWorkloadTest()
883 {
884     Graph graph;
885     NeonWorkloadFactory factory =
886             NeonWorkloadFactoryHelper::GetFactory(NeonWorkloadFactoryHelper::GetMemoryManager());
887 
888     auto workload = CreateLstmWorkloadTest<LstmWorkloadType>(factory, graph);
889 
890     LstmQueueDescriptor queueDescriptor = workload->GetData();
891 
892     auto inputHandle  = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
893     auto outputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Outputs[1]);
894 
895     CHECK(TestNeonTensorHandleInfo(inputHandle, TensorInfo({ 2, 2 }, DataType::Float32)));
896     CHECK(TestNeonTensorHandleInfo(outputHandle, TensorInfo({ 2, 4 }, DataType::Float32)));
897 }
898 
899 TEST_CASE("CreateLSTMWorkloadFloatWorkload")
900 {
901     NeonCreateLstmWorkloadTest<NeonLstmFloatWorkload>();
902 }
903 
904 template <typename ConcatWorkloadType, armnn::DataType DataType>
NeonCreateConcatWorkloadTest(std::initializer_list<unsigned int> outputShape,unsigned int concatAxis)905 static void NeonCreateConcatWorkloadTest(std::initializer_list<unsigned int> outputShape,
906                                          unsigned int concatAxis)
907 {
908     Graph graph;
909     NeonWorkloadFactory factory =
910         NeonWorkloadFactoryHelper::GetFactory(NeonWorkloadFactoryHelper::GetMemoryManager());
911 
912     auto workload = CreateConcatWorkloadTest<ConcatWorkloadType, DataType>(factory, graph, outputShape, concatAxis);
913 
914     ConcatQueueDescriptor queueDescriptor = workload->GetData();
915     auto inputHandle0 = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
916     auto inputHandle1 = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Inputs[1]);
917     auto outputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Outputs[0]);
918 
919     CHECK(TestNeonTensorHandleInfo(inputHandle0, TensorInfo({ 2, 3, 2, 5 }, DataType)));
920     CHECK(TestNeonTensorHandleInfo(inputHandle1, TensorInfo({ 2, 3, 2, 5 }, DataType)));
921     CHECK(TestNeonTensorHandleInfo(outputHandle, TensorInfo(outputShape, DataType)));
922 }
923 
924 TEST_CASE("CreateConcatDim0Float32Workload")
925 {
926     NeonCreateConcatWorkloadTest<NeonConcatWorkload, armnn::DataType::Float32>({ 4, 3, 2, 5 }, 0);
927 }
928 
929 TEST_CASE("CreateConcatDim1Float32Workload")
930 {
931     NeonCreateConcatWorkloadTest<NeonConcatWorkload, armnn::DataType::Float32>({ 2, 6, 2, 5 }, 1);
932 }
933 
934 TEST_CASE("CreateConcatDim3Float32Workload")
935 {
936     NeonCreateConcatWorkloadTest<NeonConcatWorkload, armnn::DataType::Float32>({ 2, 3, 2, 10 }, 3);
937 }
938 
939 TEST_CASE("CreateConcatDim0Uint8Workload")
940 {
941     NeonCreateConcatWorkloadTest<NeonConcatWorkload, armnn::DataType::QAsymmU8>({ 4, 3, 2, 5 }, 0);
942 }
943 
944 TEST_CASE("CreateConcatDim1Uint8Workload")
945 {
946     NeonCreateConcatWorkloadTest<NeonConcatWorkload, armnn::DataType::QAsymmU8>({ 2, 6, 2, 5 }, 1);
947 }
948 
949 TEST_CASE("CreateConcatDim3Uint8Workload")
950 {
951     NeonCreateConcatWorkloadTest<NeonConcatWorkload, armnn::DataType::QAsymmU8>({ 2, 3, 2, 10 }, 3);
952 }
953 
954 template <armnn::DataType DataType>
NeonCreateStackWorkloadTest(const std::initializer_list<unsigned int> & inputShape,const std::initializer_list<unsigned int> & outputShape,unsigned int axis,unsigned int numInputs)955 static void NeonCreateStackWorkloadTest(const std::initializer_list<unsigned int>& inputShape,
956                                         const std::initializer_list<unsigned int>& outputShape,
957                                         unsigned int axis,
958                                         unsigned int numInputs)
959 {
960     armnn::Graph graph;
961     NeonWorkloadFactory factory =
962             NeonWorkloadFactoryHelper::GetFactory(NeonWorkloadFactoryHelper::GetMemoryManager());
963 
964     auto workload = CreateStackWorkloadTest<NeonStackWorkload, DataType>(factory,
965                                                                          graph,
966                                                                          TensorShape(inputShape),
967                                                                          TensorShape(outputShape),
968                                                                          axis,
969                                                                          numInputs);
970 
971     // Check inputs and output are as expected
972     StackQueueDescriptor queueDescriptor = workload->GetData();
973     for (unsigned int i = 0; i < numInputs; ++i)
974     {
975         auto inputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Inputs[i]);
976         CHECK(TestNeonTensorHandleInfo(inputHandle, TensorInfo(inputShape, DataType)));
977     }
978     auto outputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Outputs[0]);
979     CHECK(TestNeonTensorHandleInfo(outputHandle, TensorInfo(outputShape, DataType)));
980 }
981 
982 TEST_CASE("CreateStackFloat32Workload")
983 {
984     NeonCreateStackWorkloadTest<armnn::DataType::Float32>({ 3, 4, 5 }, { 3, 4, 2, 5 }, 2, 2);
985 }
986 
987 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
988 TEST_CASE("CreateStackFloat16Workload")
989 {
990     NeonCreateStackWorkloadTest<armnn::DataType::Float16>({ 3, 4, 5 }, { 3, 4, 2, 5 }, 2, 2);
991 }
992 #endif
993 
994 TEST_CASE("CreateStackUint8Workload")
995 {
996     NeonCreateStackWorkloadTest<armnn::DataType::QAsymmU8>({ 3, 4, 5 }, { 3, 4, 2, 5 }, 2, 2);
997 }
998 
999 template <typename QuantizedLstmWorkloadType>
NeonCreateQuantizedLstmWorkloadTest()1000 static void NeonCreateQuantizedLstmWorkloadTest()
1001 {
1002     Graph graph;
1003     NeonWorkloadFactory factory = NeonWorkloadFactoryHelper::GetFactory(NeonWorkloadFactoryHelper::GetMemoryManager());
1004 
1005     auto workload = CreateQuantizedLstmWorkloadTest<QuantizedLstmWorkloadType>(factory, graph);
1006 
1007     QuantizedLstmQueueDescriptor queueDescriptor = workload->GetData();
1008 
1009     IAclTensorHandle* inputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
1010     CHECK((inputHandle->GetShape() == TensorShape({2, 2})));
1011     CHECK((inputHandle->GetDataType() == arm_compute::DataType::QASYMM8));
1012 
1013     IAclTensorHandle* cellStateInHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Inputs[1]);
1014     CHECK((cellStateInHandle->GetShape() == TensorShape({2, 4})));
1015     CHECK((cellStateInHandle->GetDataType() == arm_compute::DataType::QSYMM16));
1016 
1017     IAclTensorHandle* outputStateInHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Inputs[2]);
1018     CHECK((outputStateInHandle->GetShape() == TensorShape({2, 4})));
1019     CHECK((outputStateInHandle->GetDataType() == arm_compute::DataType::QASYMM8));
1020 
1021     IAclTensorHandle* cellStateOutHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Outputs[0]);
1022     CHECK((cellStateOutHandle->GetShape() == TensorShape({2, 4})));
1023     CHECK((cellStateOutHandle->GetDataType() == arm_compute::DataType::QSYMM16));
1024 
1025     IAclTensorHandle* outputStateOutHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Outputs[1]);
1026     CHECK((outputStateOutHandle->GetShape() == TensorShape({2, 4})));
1027     CHECK((outputStateOutHandle->GetDataType() == arm_compute::DataType::QASYMM8));
1028 }
1029 
1030 TEST_CASE("CreateQuantizedLstmWorkload")
1031 {
1032     NeonCreateQuantizedLstmWorkloadTest<NeonQuantizedLstmWorkload>();
1033 }
1034 
1035 template <typename QLstmWorkloadType>
NeonCreateQLstmWorkloadTest()1036 static void NeonCreateQLstmWorkloadTest()
1037 {
1038     Graph graph;
1039     NeonWorkloadFactory factory = NeonWorkloadFactoryHelper::GetFactory(NeonWorkloadFactoryHelper::GetMemoryManager());
1040 
1041     auto workload = CreateQLstmWorkloadTest<QLstmWorkloadType>(factory, graph);
1042     QLstmQueueDescriptor queueDescriptor = workload->GetData();
1043 
1044     IAclTensorHandle* inputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
1045     CHECK((inputHandle->GetShape() == TensorShape({2, 4})));
1046     CHECK((inputHandle->GetDataType() == arm_compute::DataType::QASYMM8_SIGNED));
1047 
1048     IAclTensorHandle* cellStateOutHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Outputs[1]);
1049     CHECK((cellStateOutHandle->GetShape() == TensorShape({2, 4})));
1050     CHECK((cellStateOutHandle->GetDataType() == arm_compute::DataType::QSYMM16));
1051 
1052     IAclTensorHandle* outputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Outputs[2]);
1053     CHECK((outputHandle->GetShape() == TensorShape({2, 4})));
1054     CHECK((outputHandle->GetDataType() == arm_compute::DataType::QASYMM8_SIGNED));
1055 }
1056 
1057 TEST_CASE("CreateQLstmWorkloadTest")
1058 {
1059     NeonCreateQLstmWorkloadTest<NeonQLstmWorkload>();
1060 }
1061 
1062 template <armnn::DataType DataType>
NeonCreateActivationWorkloadReplaceFunctionsTest()1063 static void NeonCreateActivationWorkloadReplaceFunctionsTest()
1064 {
1065     shared_ptr<NeonMemoryManager> memoryManager = make_shared<NeonMemoryManager>();
1066 
1067     Graph graph;
1068     NeonWorkloadFactory factory = NeonWorkloadFactoryHelper::GetFactory(memoryManager);
1069     // input and output are created as armnn::TensorInfo tensorInfo({1, 1}, DataType)
1070     auto workloadPtr = CreateActivationWorkloadTest<NeonActivationWorkload, DataType>(factory, graph);
1071 
1072     // new input and output tensor handlers are created and then replace in the workload
1073     const NeonTensorHandleFactory tensorHandleFactory(memoryManager);
1074     TensorInfo inputInfo({2 , 2}, DataType::Float16);
1075     TensorInfo outputInfo({2 , 2}, DataType::Float16);
1076     unique_ptr<ITensorHandle> inputHandle  = tensorHandleFactory.CreateTensorHandle(inputInfo);
1077     inputHandle->Allocate();
1078     unique_ptr<ITensorHandle> outputHandle = tensorHandleFactory.CreateTensorHandle(outputInfo);
1079     outputHandle->Allocate();
1080 
1081     unsigned int slot = 0;
1082     CHECK_THROWS_AS(workloadPtr->ReplaceInputTensorHandle(inputHandle.get(), slot), UnimplementedException);
1083     CHECK_THROWS_AS(workloadPtr->ReplaceOutputTensorHandle(outputHandle.get(), slot), UnimplementedException);
1084 }
1085 
1086 TEST_CASE("NeonReplaceFunctionsfromFloat32toFloat16ActivationWorkload")
1087 {
1088     NeonCreateActivationWorkloadReplaceFunctionsTest<armnn::DataType::Float32>();
1089 }
1090 
1091 TEST_CASE("NeonReplaceFunctionsfromUint8toFloat16ActivationWorkload")
1092 {
1093     NeonCreateActivationWorkloadReplaceFunctionsTest<armnn::DataType::QAsymmU8>();
1094 }
1095 
1096 }
1097