1 //
2 // Copyright © 2017, 2023 Arm Ltd and Contributors. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5
6 #include "NeonWorkloadFactoryHelper.hpp"
7
8 #include <aclCommon/ArmComputeTensorUtils.hpp>
9 #include <armnn/utility/Assert.hpp>
10 #include <armnn/utility/IgnoreUnused.hpp>
11 #include <armnn/utility/PolymorphicDowncast.hpp>
12 #include <armnn/backends/MemCopyWorkload.hpp>
13
14 #include <aclCommon/test/CreateWorkloadClNeon.hpp>
15
16 #include <neon/NeonWorkloadFactory.hpp>
17 #include <neon/NeonTensorHandle.hpp>
18 #include <neon/workloads/NeonWorkloadUtils.hpp>
19 #include <neon/workloads/NeonWorkloads.hpp>
20
21 #include <doctest/doctest.h>
22
23 TEST_SUITE("CreateWorkloadNeon")
24 {
25 namespace
26 {
27
CompareIAclTensorHandleShape(IAclTensorHandle * tensorHandle,std::initializer_list<unsigned int> expectedDimensions)28 armnn::PredicateResult CompareIAclTensorHandleShape(IAclTensorHandle* tensorHandle,
29 std::initializer_list<unsigned int> expectedDimensions)
30 {
31 return CompareTensorHandleShape<IAclTensorHandle>(tensorHandle, expectedDimensions);
32 }
33
TestNeonTensorHandleInfo(armnn::IAclTensorHandle * handle,const armnn::TensorInfo & expectedInfo)34 bool TestNeonTensorHandleInfo(armnn::IAclTensorHandle* handle, const armnn::TensorInfo& expectedInfo)
35 {
36 using namespace armnn::armcomputetensorutils;
37
38 const arm_compute::ITensorInfo* handleInfo = handle->GetTensor().info();
39 const arm_compute::TensorInfo expectedAclInfo = BuildArmComputeTensorInfo(expectedInfo);
40
41 if (handleInfo->data_type() != expectedAclInfo.data_type())
42 {
43 return false;
44 }
45
46 if (handleInfo->num_dimensions() != expectedAclInfo.num_dimensions())
47 {
48 return false;
49 }
50
51 if (handleInfo->quantization_info() != expectedAclInfo.quantization_info())
52 {
53 return false;
54 }
55
56 for (std::size_t d = 0; d < expectedAclInfo.num_dimensions(); ++d)
57 {
58 if (handleInfo->dimension(d) != expectedAclInfo.dimension(d))
59 {
60 return false;
61 }
62 }
63
64 return true;
65 }
66
67 } // namespace
68
69 template <typename armnn::DataType DataType>
NeonCreateActivationWorkloadTest()70 static void NeonCreateActivationWorkloadTest()
71 {
72 Graph graph;
73 NeonWorkloadFactory factory =
74 NeonWorkloadFactoryHelper::GetFactory(NeonWorkloadFactoryHelper::GetMemoryManager());
75
76 auto workload = CreateActivationWorkloadTest<NeonActivationWorkload, DataType>(factory, graph);
77
78 // Checks that inputs/outputs are as we expect them (see definition of CreateActivationWorkloadTest).
79 ActivationQueueDescriptor queueDescriptor = workload->GetData();
80 auto inputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
81 auto outputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Outputs[0]);
82 CHECK(TestNeonTensorHandleInfo(inputHandle, TensorInfo({1, 1}, DataType)));
83 CHECK(TestNeonTensorHandleInfo(outputHandle, TensorInfo({1, 1}, DataType)));
84 }
85
86 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
87 TEST_CASE("CreateActivationFloat16Workload")
88 {
89 NeonCreateActivationWorkloadTest<DataType::Float16>();
90 }
91 #endif
92
93 TEST_CASE("CreateActivationFloatWorkload")
94 {
95 NeonCreateActivationWorkloadTest<DataType::Float32>();
96 }
97
98 template <typename WorkloadType,
99 typename DescriptorType,
100 typename LayerType,
101 armnn::DataType DataType>
NeonCreateElementwiseWorkloadTest()102 static void NeonCreateElementwiseWorkloadTest()
103 {
104 Graph graph;
105 NeonWorkloadFactory factory =
106 NeonWorkloadFactoryHelper::GetFactory(NeonWorkloadFactoryHelper::GetMemoryManager());
107
108 auto workload = CreateElementwiseWorkloadTest<WorkloadType, DescriptorType, LayerType, DataType>(factory, graph);
109
110 DescriptorType queueDescriptor = workload->GetData();
111 auto inputHandle1 = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
112 auto inputHandle2 = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Inputs[1]);
113 auto outputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Outputs[0]);
114 CHECK(TestNeonTensorHandleInfo(inputHandle1, TensorInfo({2, 3}, DataType)));
115 CHECK(TestNeonTensorHandleInfo(inputHandle2, TensorInfo({2, 3}, DataType)));
116 CHECK(TestNeonTensorHandleInfo(outputHandle, TensorInfo({2, 3}, DataType)));
117 }
118
119 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
120 TEST_CASE("CreateAdditionFloat16Workload")
121 {
122 NeonCreateElementwiseWorkloadTest<NeonAdditionWorkload,
123 AdditionQueueDescriptor,
124 AdditionLayer,
125 DataType::Float16>();
126 }
127 #endif
128
129 TEST_CASE("CreateAdditionFloatWorkload")
130 {
131 NeonCreateElementwiseWorkloadTest<NeonAdditionWorkload,
132 AdditionQueueDescriptor,
133 AdditionLayer,
134 DataType::Float32>();
135 }
136
137 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
138 TEST_CASE("CreateSubtractionFloat16Workload")
139 {
140 NeonCreateElementwiseWorkloadTest<NeonSubtractionWorkload,
141 SubtractionQueueDescriptor,
142 SubtractionLayer,
143 DataType::Float16>();
144 }
145 #endif
146
147 TEST_CASE("CreateSubtractionFloatWorkload")
148 {
149 NeonCreateElementwiseWorkloadTest<NeonSubtractionWorkload,
150 SubtractionQueueDescriptor,
151 SubtractionLayer,
152 DataType::Float32>();
153 }
154
155 TEST_CASE("CreateSubtractionUint8Workload")
156 {
157 NeonCreateElementwiseWorkloadTest<NeonSubtractionWorkload,
158 SubtractionQueueDescriptor,
159 SubtractionLayer,
160 DataType::QAsymmU8>();
161 }
162
163 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
164 TEST_CASE("CreateMultiplicationFloat16Workload")
165 {
166 NeonCreateElementwiseWorkloadTest<NeonMultiplicationWorkload,
167 MultiplicationQueueDescriptor,
168 MultiplicationLayer,
169 DataType::Float16>();
170 }
171 #endif
172
173 TEST_CASE("CreateMultiplicationFloatWorkload")
174 {
175 NeonCreateElementwiseWorkloadTest<NeonMultiplicationWorkload,
176 MultiplicationQueueDescriptor,
177 MultiplicationLayer,
178 DataType::Float32>();
179 }
180
181 TEST_CASE("CreateMultiplicationUint8Workload")
182 {
183 NeonCreateElementwiseWorkloadTest<NeonMultiplicationWorkload,
184 MultiplicationQueueDescriptor,
185 MultiplicationLayer,
186 DataType::QAsymmU8>();
187 }
188
189 TEST_CASE("CreateDivisionFloatWorkloadTest")
190 {
191 NeonCreateElementwiseWorkloadTest<NeonDivisionWorkload,
192 DivisionQueueDescriptor,
193 DivisionLayer,
194 armnn::DataType::Float32>();
195 }
196
197 template <typename BatchNormalizationWorkloadType, typename armnn::DataType DataType>
NeonCreateBatchNormalizationWorkloadTest(DataLayout dataLayout)198 static void NeonCreateBatchNormalizationWorkloadTest(DataLayout dataLayout)
199 {
200 Graph graph;
201 NeonWorkloadFactory factory =
202 NeonWorkloadFactoryHelper::GetFactory(NeonWorkloadFactoryHelper::GetMemoryManager());
203
204 auto workload = CreateBatchNormalizationWorkloadTest<BatchNormalizationWorkloadType, DataType>
205 (factory, graph, dataLayout);
206
207 // Checks that outputs and inputs are as we expect them (see definition of CreateBatchNormalizationWorkloadTest).
208 BatchNormalizationQueueDescriptor queueDescriptor = workload->GetData();
209 auto inputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
210 auto outputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Outputs[0]);
211
212 TensorShape inputShape = (dataLayout == DataLayout::NCHW) ? TensorShape{2, 3, 4, 4} : TensorShape{2, 4, 4, 3};
213 TensorShape outputShape = (dataLayout == DataLayout::NCHW) ? TensorShape{2, 3, 4, 4} : TensorShape{2, 4, 4, 3};
214
215 CHECK(TestNeonTensorHandleInfo(inputHandle, TensorInfo(inputShape, DataType)));
216 CHECK(TestNeonTensorHandleInfo(outputHandle, TensorInfo(outputShape, DataType)));
217 }
218
219 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
220 TEST_CASE("CreateBatchNormalizationFloat16NchwWorkload")
221 {
222 NeonCreateBatchNormalizationWorkloadTest<NeonBatchNormalizationWorkload, DataType::Float16>(DataLayout::NCHW);
223 }
224
225 TEST_CASE("CreateBatchNormalizationFloat16NhwcWorkload")
226 {
227 NeonCreateBatchNormalizationWorkloadTest<NeonBatchNormalizationWorkload, DataType::Float16>(DataLayout::NHWC);
228 }
229 #endif
230
231 TEST_CASE("CreateBatchNormalizationFloatNchwWorkload")
232 {
233 NeonCreateBatchNormalizationWorkloadTest<NeonBatchNormalizationWorkload, DataType::Float32>(DataLayout::NCHW);
234 }
235
236 TEST_CASE("CreateBatchNormalizationFloatNhwcWorkload")
237 {
238 NeonCreateBatchNormalizationWorkloadTest<NeonBatchNormalizationWorkload, DataType::Float32>(DataLayout::NHWC);
239 }
240
241 template <typename armnn::DataType DataType>
NeonCreateConvolution2dWorkloadTest(DataLayout dataLayout=DataLayout::NCHW)242 static void NeonCreateConvolution2dWorkloadTest(DataLayout dataLayout = DataLayout::NCHW)
243 {
244 Graph graph;
245 NeonWorkloadFactory factory =
246 NeonWorkloadFactoryHelper::GetFactory(NeonWorkloadFactoryHelper::GetMemoryManager());
247
248 auto workload = CreateConvolution2dWorkloadTest<NeonConvolution2dWorkload, DataType>(factory, graph, dataLayout);
249
250 TensorShape inputShape = (dataLayout == DataLayout::NCHW) ? TensorShape{2, 3, 8, 16} : TensorShape{2, 8, 16, 3};
251 TensorShape outputShape = (dataLayout == DataLayout::NCHW) ? TensorShape{2, 2, 2, 10} : TensorShape{2, 2, 10, 2};
252
253 // Checks that outputs and inputs are as we expect them (see definition of CreateConvolution2dWorkloadTest).
254 Convolution2dQueueDescriptor queueDescriptor = workload->GetData();
255 auto inputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
256 auto outputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Outputs[0]);
257 CHECK(TestNeonTensorHandleInfo(inputHandle, TensorInfo(inputShape, DataType)));
258 CHECK(TestNeonTensorHandleInfo(outputHandle, TensorInfo(outputShape, DataType)));
259 }
260
261 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
262 TEST_CASE("CreateConvolution2dFloat16NchwWorkload")
263 {
264 NeonCreateConvolution2dWorkloadTest<DataType::Float16>();
265 }
266
267 TEST_CASE("CreateConvolution2dFloat16NhwcWorkload")
268 {
269 NeonCreateConvolution2dWorkloadTest<DataType::Float16>(DataLayout::NHWC);
270 }
271
272 #endif
273 TEST_CASE("CreateConvolution2dFloatNchwWorkload")
274 {
275 NeonCreateConvolution2dWorkloadTest<DataType::Float32>();
276 }
277
278 TEST_CASE("CreateConvolution2dFloatNhwcWorkload")
279 {
280 NeonCreateConvolution2dWorkloadTest<DataType::Float32>(DataLayout::NHWC);
281 }
282
283 TEST_CASE("CreateConvolution2dFastMathEnabledWorkload")
284 {
285 Graph graph;
286 using ModelOptions = std::vector<BackendOptions>;
287 ModelOptions modelOptions = {};
288 BackendOptions cpuAcc("CpuAcc",
289 {
290 { "FastMathEnabled", true }
291 });
292 modelOptions.push_back(cpuAcc);
293 NeonWorkloadFactory factory =
294 NeonWorkloadFactoryHelper::GetFactory(NeonWorkloadFactoryHelper::GetMemoryManager(), modelOptions);
295
296 auto workload =
297 CreateConvolution2dWorkloadFastMathTest<NeonConvolution2dWorkload, armnn::DataType::Float32>(factory,
298 graph,
299 DataLayout::NCHW,
300 modelOptions);
301
302 ARMNN_ASSERT(workload != nullptr);
303 auto conv2dWorkload = PolymorphicDowncast<NeonConvolution2dWorkload*>(workload.get());
304 IgnoreUnused(conv2dWorkload);
305 ARMNN_ASSERT(conv2dWorkload != nullptr);
306 ARMNN_ASSERT(conv2dWorkload->GetConvolutionMethod() == arm_compute::ConvolutionMethod::WINOGRAD);
307 }
308
309 template <typename armnn::DataType DataType>
NeonCreateDepthWiseConvolutionWorkloadTest(DataLayout dataLayout)310 static void NeonCreateDepthWiseConvolutionWorkloadTest(DataLayout dataLayout)
311 {
312 Graph graph;
313 NeonWorkloadFactory factory =
314 NeonWorkloadFactoryHelper::GetFactory(NeonWorkloadFactoryHelper::GetMemoryManager());
315
316 auto workload = CreateDepthwiseConvolution2dWorkloadTest<NeonDepthwiseConvolutionWorkload,
317 DataType>(factory, graph, dataLayout);
318
319 // Checks that inputs/outputs are as we expect them (see definition of CreateNormalizationWorkloadTest).
320 DepthwiseConvolution2dQueueDescriptor queueDescriptor = workload->GetData();
321 auto inputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
322 auto outputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Outputs[0]);
323
324 TensorShape inputShape = (dataLayout == DataLayout::NCHW) ? std::initializer_list<unsigned int>({ 2, 2, 5, 5 })
325 : std::initializer_list<unsigned int>({ 2, 5, 5, 2 });
326 TensorShape outputShape = (dataLayout == DataLayout::NCHW) ? std::initializer_list<unsigned int>({ 2, 2, 5, 5 })
327 : std::initializer_list<unsigned int>({ 2, 5, 5, 2 });
328
329 CHECK(TestNeonTensorHandleInfo(inputHandle, TensorInfo(inputShape, DataType)));
330 CHECK(TestNeonTensorHandleInfo(outputHandle, TensorInfo(outputShape, DataType)));
331 }
332
333 TEST_CASE("CreateDepthWiseConvolution2dFloat32NhwcWorkload")
334 {
335 NeonCreateDepthWiseConvolutionWorkloadTest<DataType::Float32>(DataLayout::NHWC);
336 }
337
338 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
339 TEST_CASE("CreateDepthWiseConvolution2dFloat16NhwcWorkload")
340 {
341 NeonCreateDepthWiseConvolutionWorkloadTest<DataType::Float16>(DataLayout::NHWC);
342 }
343 #endif
344
345 template <typename FullyConnectedWorkloadType, typename armnn::DataType DataType>
NeonCreateFullyConnectedWorkloadTest()346 static void NeonCreateFullyConnectedWorkloadTest()
347 {
348 Graph graph;
349 NeonWorkloadFactory factory =
350 NeonWorkloadFactoryHelper::GetFactory(NeonWorkloadFactoryHelper::GetMemoryManager());
351
352 auto workload = CreateFullyConnectedWorkloadTest<FullyConnectedWorkloadType, DataType>(factory, graph);
353
354 // Checks that outputs and inputs are as we expect them (see definition of CreateFullyConnectedWorkloadTest).
355 FullyConnectedQueueDescriptor queueDescriptor = workload->GetData();
356 auto inputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
357 auto outputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Outputs[0]);
358
359 // Checks that outputs and inputs are as we expect them (see definition of CreateFullyConnectedWorkloadTest).
360 float inputsQScale = 1.0f;
361 float outputQScale = DataType == armnn::DataType::QAsymmU8 ? 2.0f : 1.0;
362 CHECK(TestNeonTensorHandleInfo(inputHandle, TensorInfo({3, 1, 4, 5}, DataType, inputsQScale)));
363 CHECK(TestNeonTensorHandleInfo(outputHandle, TensorInfo({3, 7}, DataType, outputQScale)));
364 }
365
366 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
367 TEST_CASE("CreateFullyConnectedFloat16Workload")
368 {
369 NeonCreateFullyConnectedWorkloadTest<NeonFullyConnectedWorkload, DataType::Float16>();
370 }
371 #endif
372
373 TEST_CASE("CreateFullyConnectedFloatWorkload")
374 {
375 NeonCreateFullyConnectedWorkloadTest<NeonFullyConnectedWorkload, DataType::Float32>();
376 }
377
378 TEST_CASE("CreateFullyConnectedQAsymmU8Workload")
379 {
380 NeonCreateFullyConnectedWorkloadTest<NeonFullyConnectedWorkload, DataType::QAsymmU8>();
381 }
382
383 TEST_CASE("CreateFullyConnectedQAsymmS8Workload")
384 {
385 NeonCreateFullyConnectedWorkloadTest<NeonFullyConnectedWorkload, DataType::QAsymmS8>();
386 }
387
388 template <typename NormalizationWorkloadType, typename armnn::DataType DataType>
NeonCreateNormalizationWorkloadTest(DataLayout dataLayout)389 static void NeonCreateNormalizationWorkloadTest(DataLayout dataLayout)
390 {
391 Graph graph;
392 NeonWorkloadFactory factory =
393 NeonWorkloadFactoryHelper::GetFactory(NeonWorkloadFactoryHelper::GetMemoryManager());
394
395 auto workload = CreateNormalizationWorkloadTest<NormalizationWorkloadType, DataType>(factory, graph, dataLayout);
396
397 // Checks that outputs and inputs are as we expect them (see definition of CreateNormalizationWorkloadTest).
398 NormalizationQueueDescriptor queueDescriptor = workload->GetData();
399 auto inputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
400 auto outputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Outputs[0]);
401
402 TensorShape inputShape = (dataLayout == DataLayout::NCHW) ? TensorShape{3, 5, 5, 1} : TensorShape{3, 1, 5, 5};
403 TensorShape outputShape = (dataLayout == DataLayout::NCHW) ? TensorShape{3, 5, 5, 1} : TensorShape{3, 1, 5, 5};
404
405 CHECK(TestNeonTensorHandleInfo(inputHandle, TensorInfo(inputShape, DataType)));
406 CHECK(TestNeonTensorHandleInfo(outputHandle, TensorInfo(outputShape, DataType)));
407 }
408
409 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
410 TEST_CASE("CreateNormalizationFloat16NchwWorkload")
411 {
412 NeonCreateNormalizationWorkloadTest<NeonNormalizationFloatWorkload, DataType::Float16>(DataLayout::NCHW);
413 }
414
415 TEST_CASE("CreateNormalizationFloat16NhwcWorkload")
416 {
417 NeonCreateNormalizationWorkloadTest<NeonNormalizationFloatWorkload, DataType::Float16>(DataLayout::NHWC);
418 }
419 #endif
420
421 TEST_CASE("CreateNormalizationFloatNchwWorkload")
422 {
423 NeonCreateNormalizationWorkloadTest<NeonNormalizationFloatWorkload, DataType::Float32>(DataLayout::NCHW);
424 }
425
426 TEST_CASE("CreateNormalizationFloatNhwcWorkload")
427 {
428 NeonCreateNormalizationWorkloadTest<NeonNormalizationFloatWorkload, DataType::Float32>(DataLayout::NHWC);
429 }
430
431
432 template <typename armnn::DataType DataType>
NeonCreatePooling2dWorkloadTest(DataLayout dataLayout=DataLayout::NCHW)433 static void NeonCreatePooling2dWorkloadTest(DataLayout dataLayout = DataLayout::NCHW)
434 {
435 Graph graph;
436 NeonWorkloadFactory factory =
437 NeonWorkloadFactoryHelper::GetFactory(NeonWorkloadFactoryHelper::GetMemoryManager());
438
439 auto workload = CreatePooling2dWorkloadTest<NeonPooling2dWorkload, DataType>(factory, graph, dataLayout);
440
441 TensorShape inputShape = (dataLayout == DataLayout::NCHW) ? TensorShape{3, 2, 5, 5} : TensorShape{3, 5, 5, 2};
442 TensorShape outputShape = (dataLayout == DataLayout::NCHW) ? TensorShape{3, 2, 2, 4} : TensorShape{3, 2, 4, 2};
443
444 // Checks that outputs and inputs are as we expect them (see definition of CreatePooling2dWorkloadTest).
445 Pooling2dQueueDescriptor queueDescriptor = workload->GetData();
446 auto inputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
447 auto outputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Outputs[0]);
448 CHECK(TestNeonTensorHandleInfo(inputHandle, TensorInfo(inputShape, DataType)));
449 CHECK(TestNeonTensorHandleInfo(outputHandle, TensorInfo(outputShape, DataType)));
450 }
451
452 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
453 TEST_CASE("CreatePooling2dFloat16Workload")
454 {
455 NeonCreatePooling2dWorkloadTest<DataType::Float16>();
456 }
457 #endif
458
459 TEST_CASE("CreatePooling2dFloatNchwWorkload")
460 {
461 NeonCreatePooling2dWorkloadTest<DataType::Float32>(DataLayout::NCHW);
462 }
463
464 TEST_CASE("CreatePooling2dFloatNhwcWorkload")
465 {
466 NeonCreatePooling2dWorkloadTest<DataType::Float32>(DataLayout::NHWC);
467 }
468
469 TEST_CASE("CreatePooling2dUint8NchwWorkload")
470 {
471 NeonCreatePooling2dWorkloadTest<DataType::QAsymmU8>(DataLayout::NCHW);
472 }
473
474 TEST_CASE("CreatePooling2dUint8NhwcWorkload")
475 {
476 NeonCreatePooling2dWorkloadTest<DataType::QAsymmU8>(DataLayout::NHWC);
477 }
478
NeonCreatePreluWorkloadTest(const armnn::TensorShape & inputShape,const armnn::TensorShape & alphaShape,const armnn::TensorShape & outputShape,armnn::DataType dataType)479 static void NeonCreatePreluWorkloadTest(const armnn::TensorShape& inputShape,
480 const armnn::TensorShape& alphaShape,
481 const armnn::TensorShape& outputShape,
482 armnn::DataType dataType)
483 {
484 Graph graph;
485 NeonWorkloadFactory factory =
486 NeonWorkloadFactoryHelper::GetFactory(NeonWorkloadFactoryHelper::GetMemoryManager());
487
488 auto workload = CreatePreluWorkloadTest<NeonPreluWorkload>(factory,
489 graph,
490 inputShape,
491 alphaShape,
492 outputShape,
493 dataType);
494
495 // Checks that outputs and inputs are as we expect them (see definition of CreateReshapeWorkloadTest).
496 PreluQueueDescriptor queueDescriptor = workload->GetData();
497 auto inputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
498 auto alphaHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Inputs[1]);
499 auto outputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Outputs[0]);
500 CHECK(TestNeonTensorHandleInfo(inputHandle, TensorInfo(inputShape, dataType)));
501 CHECK(TestNeonTensorHandleInfo(alphaHandle, TensorInfo(alphaShape, dataType)));
502 CHECK(TestNeonTensorHandleInfo(outputHandle, TensorInfo(outputShape, dataType)));
503 }
504
505 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
506 TEST_CASE("CreatePreluFloat16Workload")
507 {
508 NeonCreatePreluWorkloadTest({ 1, 4, 1, 2 }, { 5, 4, 3, 1 }, { 5, 4, 3, 2 }, DataType::Float16);
509 }
510 #endif
511
512 TEST_CASE("CreatePreluFloatWorkload")
513 {
514 NeonCreatePreluWorkloadTest({ 1, 4, 1, 2 }, { 5, 4, 3, 1 }, { 5, 4, 3, 2 }, DataType::Float32);
515 }
516
517 TEST_CASE("CreatePreluUint8Workload")
518 {
519 NeonCreatePreluWorkloadTest({ 1, 4, 1, 2 }, { 5, 4, 3, 1 }, { 5, 4, 3, 2 }, DataType::QAsymmU8);
520 }
521
522 template <typename armnn::DataType DataType>
NeonCreateReshapeWorkloadTest()523 static void NeonCreateReshapeWorkloadTest()
524 {
525 Graph graph;
526 NeonWorkloadFactory factory =
527 NeonWorkloadFactoryHelper::GetFactory(NeonWorkloadFactoryHelper::GetMemoryManager());
528
529 auto workload = CreateReshapeWorkloadTest<NeonReshapeWorkload, DataType>(factory, graph);
530
531 // Checks that outputs and inputs are as we expect them (see definition of CreateReshapeWorkloadTest).
532 ReshapeQueueDescriptor queueDescriptor = workload->GetData();
533 auto inputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
534 auto outputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Outputs[0]);
535 CHECK(TestNeonTensorHandleInfo(inputHandle, TensorInfo({4, 1}, DataType)));
536 CHECK(TestNeonTensorHandleInfo(outputHandle, TensorInfo({1, 4}, DataType)));
537 }
538
539 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
540 TEST_CASE("CreateReshapeFloat16Workload")
541 {
542 NeonCreateReshapeWorkloadTest<DataType::Float16>();
543 }
544 #endif
545
546 TEST_CASE("CreateReshapeFloatWorkload")
547 {
548 NeonCreateReshapeWorkloadTest<DataType::Float32>();
549 }
550
551 TEST_CASE("CreateReshapeUint8Workload")
552 {
553 NeonCreateReshapeWorkloadTest<DataType::QAsymmU8>();
554 }
555
556 template <typename ResizeWorkloadType, armnn::DataType DataType>
NeonCreateResizeWorkloadTest(DataLayout dataLayout)557 static void NeonCreateResizeWorkloadTest(DataLayout dataLayout)
558 {
559 Graph graph;
560 NeonWorkloadFactory factory =
561 NeonWorkloadFactoryHelper::GetFactory(NeonWorkloadFactoryHelper::GetMemoryManager());
562 auto workload = CreateResizeBilinearWorkloadTest<ResizeWorkloadType, DataType>(factory, graph, dataLayout);
563
564 auto queueDescriptor = workload->GetData();
565
566 auto inputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
567 auto outputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Outputs[0]);
568
569 armnn::PredicateResult predResult(true);
570 switch (dataLayout)
571 {
572 case DataLayout::NHWC:
573 predResult = CompareIAclTensorHandleShape(inputHandle, { 2, 4, 4, 3 });
574 CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
575 predResult = CompareIAclTensorHandleShape(outputHandle, { 2, 2, 2, 3 });
576 CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
577 break;
578 default: // DataLayout::NCHW
579 predResult = CompareIAclTensorHandleShape(inputHandle, { 2, 3, 4, 4 });
580 CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
581 predResult = CompareIAclTensorHandleShape(outputHandle, { 2, 3, 2, 2 });
582 CHECK_MESSAGE(predResult.m_Result, predResult.m_Message.str());
583 }
584 }
585
586 TEST_CASE("CreateResizeFloat32NchwWorkload")
587 {
588 NeonCreateResizeWorkloadTest<NeonResizeWorkload, armnn::DataType::Float32>(DataLayout::NCHW);
589 }
590
591 TEST_CASE("CreateResizeUint8NchwWorkload")
592 {
593 NeonCreateResizeWorkloadTest<NeonResizeWorkload, armnn::DataType::QAsymmU8>(DataLayout::NCHW);
594 }
595
596 TEST_CASE("CreateResizeFloat32NhwcWorkload")
597 {
598 NeonCreateResizeWorkloadTest<NeonResizeWorkload, armnn::DataType::Float32>(DataLayout::NHWC);
599 }
600
601 TEST_CASE("CreateResizeUint8NhwcWorkload")
602 {
603 NeonCreateResizeWorkloadTest<NeonResizeWorkload, armnn::DataType::QAsymmU8>(DataLayout::NHWC);
604 }
605
606 template <typename SoftmaxWorkloadType, typename armnn::DataType DataType>
NeonCreateSoftmaxWorkloadTest()607 static void NeonCreateSoftmaxWorkloadTest()
608 {
609 Graph graph;
610 NeonWorkloadFactory factory =
611 NeonWorkloadFactoryHelper::GetFactory(NeonWorkloadFactoryHelper::GetMemoryManager());
612
613 auto workload = CreateSoftmaxWorkloadTest<SoftmaxWorkloadType, DataType>(factory, graph);
614
615 // Checks that outputs and inputs are as we expect them (see definition of CreateSoftmaxWorkloadTest).
616 SoftmaxQueueDescriptor queueDescriptor = workload->GetData();
617 auto inputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
618 auto outputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Outputs[0]);
619 armnn::TensorInfo tensorInfo({4, 1}, DataType);
620 if (DataType == armnn::DataType::QAsymmU8)
621 {
622 tensorInfo.SetQuantizationOffset(0);
623 tensorInfo.SetQuantizationScale(1.f / 256);
624 }
625 else if (DataType == armnn::DataType::QAsymmS8)
626 {
627 tensorInfo.SetQuantizationOffset(-128);
628 tensorInfo.SetQuantizationScale(1.f / 256);
629 }
630 CHECK(TestNeonTensorHandleInfo(inputHandle, tensorInfo));
631 CHECK(TestNeonTensorHandleInfo(outputHandle, tensorInfo));
632 }
633
634 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
635 TEST_CASE("CreateSoftmaxFloat16Workload")
636 {
637 NeonCreateSoftmaxWorkloadTest<NeonSoftmaxWorkload, DataType::Float16>();
638 }
639 #endif
640
641 TEST_CASE("CreateSoftmaxFloatWorkload")
642 {
643 NeonCreateSoftmaxWorkloadTest<NeonSoftmaxWorkload, DataType::Float32>();
644 }
645
646 TEST_CASE("CreateSoftmaxQAsymmU8Workload")
647 {
648 NeonCreateSoftmaxWorkloadTest<NeonSoftmaxWorkload, DataType::QAsymmU8>();
649 }
650
651 TEST_CASE("CreateSoftmaxQAsymmS8Workload")
652 {
653 NeonCreateSoftmaxWorkloadTest<NeonSoftmaxWorkload, DataType::QAsymmS8>();
654 }
655
656 template <typename SpaceToDepthWorkloadType, typename armnn::DataType DataType>
NeonSpaceToDepthWorkloadTest()657 static void NeonSpaceToDepthWorkloadTest()
658 {
659 Graph graph;
660 NeonWorkloadFactory factory =
661 NeonWorkloadFactoryHelper::GetFactory(NeonWorkloadFactoryHelper::GetMemoryManager());
662
663 auto workload = CreateSpaceToDepthWorkloadTest<SpaceToDepthWorkloadType, DataType>(factory, graph);
664
665 SpaceToDepthQueueDescriptor queueDescriptor = workload->GetData();
666 auto inputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
667 auto outputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Outputs[0]);
668
669 CHECK(TestNeonTensorHandleInfo(inputHandle, TensorInfo({ 1, 2, 2, 1 }, DataType)));
670 CHECK(TestNeonTensorHandleInfo(outputHandle, TensorInfo({ 1, 1, 1, 4 }, DataType)));
671 }
672
673 TEST_CASE("CreateSpaceToDepthFloat32Workload")
674 {
675 NeonSpaceToDepthWorkloadTest<NeonSpaceToDepthWorkload, armnn::DataType::Float32>();
676 }
677
678 TEST_CASE("CreateSpaceToDepthFloat16Workload")
679 {
680 NeonSpaceToDepthWorkloadTest<NeonSpaceToDepthWorkload, armnn::DataType::Float16>();
681 }
682
683 TEST_CASE("CreateSpaceToDepthQAsymm8Workload")
684 {
685 NeonSpaceToDepthWorkloadTest<NeonSpaceToDepthWorkload, armnn::DataType::QAsymmU8>();
686 }
687
688 TEST_CASE("CreateSpaceToDepthQSymm16Workload")
689 {
690 NeonSpaceToDepthWorkloadTest<NeonSpaceToDepthWorkload, armnn::DataType::QSymmS16>();
691 }
692
693 TEST_CASE("CreateSplitterWorkload")
694 {
695 Graph graph;
696 NeonWorkloadFactory factory =
697 NeonWorkloadFactoryHelper::GetFactory(NeonWorkloadFactoryHelper::GetMemoryManager());
698
699 auto workload = CreateSplitterWorkloadTest<NeonSplitterWorkload, DataType::Float32>(factory, graph);
700
701 // Checks that outputs are as we expect them (see definition of CreateSplitterWorkloadTest).
702 SplitterQueueDescriptor queueDescriptor = workload->GetData();
703 auto inputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
704 CHECK(TestNeonTensorHandleInfo(inputHandle, TensorInfo({5, 7, 7}, DataType::Float32)));
705
706 auto outputHandle0 = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Outputs[0]);
707 CHECK(TestNeonTensorHandleInfo(outputHandle0, TensorInfo({1, 7, 7}, DataType::Float32)));
708
709 auto outputHandle1 = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Outputs[1]);
710 CHECK(TestNeonTensorHandleInfo(outputHandle1, TensorInfo({2, 7, 7}, DataType::Float32)));
711
712 auto outputHandle2 = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Outputs[2]);
713 CHECK(TestNeonTensorHandleInfo(outputHandle2, TensorInfo({2, 7, 7}, DataType::Float32)));
714 }
715
716 TEST_CASE("CreateSplitterConcat")
717 {
718 // Tests that it is possible to decide which output of the splitter layer
719 // should be lined to which input of the concat layer.
720 // We tested that is is possible to specify 0th output
721 // of the splitter to be the 1st input to the concat, and the 1st output of the splitter to be 0th input
722 // of the concat.
723
724 Graph graph;
725 NeonWorkloadFactory factory =
726 NeonWorkloadFactoryHelper::GetFactory(NeonWorkloadFactoryHelper::GetMemoryManager());
727
728 auto workloads =
729 CreateSplitterConcatWorkloadTest<NeonSplitterWorkload, NeonConcatWorkload,
730 DataType::Float32>(factory, graph);
731
732 auto wlSplitter = std::move(workloads.first);
733 auto wlConcat = std::move(workloads.second);
734
735 //Checks that the index of inputs/outputs matches what we declared on InputDescriptor construction.
736 armnn::IAclTensorHandle* sOut0 = dynamic_cast<armnn::IAclTensorHandle*>(wlSplitter->GetData().m_Outputs[0]);
737 armnn::IAclTensorHandle* sOut1 = dynamic_cast<armnn::IAclTensorHandle*>(wlSplitter->GetData().m_Outputs[1]);
738 armnn::IAclTensorHandle* mIn0 = dynamic_cast<armnn::IAclTensorHandle*>(wlConcat->GetData().m_Inputs[0]);
739 armnn::IAclTensorHandle* mIn1 = dynamic_cast<armnn::IAclTensorHandle*>(wlConcat->GetData().m_Inputs[1]);
740
741 CHECK(sOut0);
742 CHECK(sOut1);
743 CHECK(mIn0);
744 CHECK(mIn1);
745
746 bool validDataPointers = (sOut0 == mIn1) && (sOut1 == mIn0);
747
748 CHECK(validDataPointers);
749 }
750
751 TEST_CASE("CreateSingleOutputMultipleInputs")
752 {
753 // Tests that it is possible to assign multiple (two) different layers to each of the outputs of a splitter layer.
754 // We created a splitter with two outputs. That each of those outputs is used by two different activation layers
755
756 Graph graph;
757 NeonWorkloadFactory factory =
758 NeonWorkloadFactoryHelper::GetFactory(NeonWorkloadFactoryHelper::GetMemoryManager());
759
760 std::unique_ptr<NeonSplitterWorkload> wlSplitter;
761 std::unique_ptr<NeonActivationWorkload> wlActiv0_0;
762 std::unique_ptr<NeonActivationWorkload> wlActiv0_1;
763 std::unique_ptr<NeonActivationWorkload> wlActiv1_0;
764 std::unique_ptr<NeonActivationWorkload> wlActiv1_1;
765
766 CreateSplitterMultipleInputsOneOutputWorkloadTest<NeonSplitterWorkload,
767 NeonActivationWorkload, DataType::Float32>(factory, graph, wlSplitter, wlActiv0_0, wlActiv0_1,
768 wlActiv1_0, wlActiv1_1);
769
770 armnn::IAclTensorHandle* sOut0 = dynamic_cast<armnn::IAclTensorHandle*>(wlSplitter->GetData().m_Outputs[0]);
771 armnn::IAclTensorHandle* sOut1 = dynamic_cast<armnn::IAclTensorHandle*>(wlSplitter->GetData().m_Outputs[1]);
772 armnn::IAclTensorHandle* activ0_0Im = dynamic_cast<armnn::IAclTensorHandle*>(wlActiv0_0->GetData().m_Inputs[0]);
773 armnn::IAclTensorHandle* activ0_1Im = dynamic_cast<armnn::IAclTensorHandle*>(wlActiv0_1->GetData().m_Inputs[0]);
774 armnn::IAclTensorHandle* activ1_0Im = dynamic_cast<armnn::IAclTensorHandle*>(wlActiv1_0->GetData().m_Inputs[0]);
775 armnn::IAclTensorHandle* activ1_1Im = dynamic_cast<armnn::IAclTensorHandle*>(wlActiv1_1->GetData().m_Inputs[0]);
776
777
778 CHECK(sOut0);
779 CHECK(sOut1);
780 CHECK(activ0_0Im);
781 CHECK(activ0_1Im);
782 CHECK(activ1_0Im);
783 CHECK(activ1_1Im);
784
785 bool validDataPointers = (sOut0 == activ0_0Im) && (sOut0 == activ0_1Im) &&
786 (sOut1 == activ1_0Im) && (sOut1 == activ1_1Im);
787
788 CHECK(validDataPointers);
789 }
790
791 #if defined(ARMNNREF_ENABLED)
792
793 // This test unit needs the reference backend, it's not available if the reference backend is not built
794
795 TEST_CASE("CreateMemCopyWorkloadsNeon")
796 {
797 NeonWorkloadFactory factory =
798 NeonWorkloadFactoryHelper::GetFactory(NeonWorkloadFactoryHelper::GetMemoryManager());
799 CreateMemCopyWorkloads<IAclTensorHandle>(factory);
800 }
801
802 #endif
803
804 template <typename L2NormalizationWorkloadType, typename armnn::DataType DataType>
NeonCreateL2NormalizationWorkloadTest(DataLayout dataLayout)805 static void NeonCreateL2NormalizationWorkloadTest(DataLayout dataLayout)
806 {
807 Graph graph;
808 NeonWorkloadFactory factory =
809 NeonWorkloadFactoryHelper::GetFactory(NeonWorkloadFactoryHelper::GetMemoryManager());
810
811 auto workload =
812 CreateL2NormalizationWorkloadTest<L2NormalizationWorkloadType, DataType>(factory, graph, dataLayout);
813
814 // Checks that inputs/outputs are as we expect them (see definition of CreateNormalizationWorkloadTest).
815 L2NormalizationQueueDescriptor queueDescriptor = workload->GetData();
816 auto inputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
817 auto outputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Outputs[0]);
818
819 TensorShape inputShape = (dataLayout == DataLayout::NCHW) ?
820 TensorShape{ 5, 20, 50, 67 } : TensorShape{ 5, 50, 67, 20 };
821 TensorShape outputShape = (dataLayout == DataLayout::NCHW) ?
822 TensorShape{ 5, 20, 50, 67 } : TensorShape{ 5, 50, 67, 20 };
823
824 CHECK(TestNeonTensorHandleInfo(inputHandle, TensorInfo(inputShape, DataType)));
825 CHECK(TestNeonTensorHandleInfo(outputHandle, TensorInfo(outputShape, DataType)));
826 }
827
828 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
829 TEST_CASE("CreateL2NormalizationFloat16NchwWorkload")
830 {
831 NeonCreateL2NormalizationWorkloadTest<NeonL2NormalizationFloatWorkload, DataType::Float16>(DataLayout::NCHW);
832 }
833
834 TEST_CASE("CreateL2NormalizationFloat16NhwcWorkload")
835 {
836 NeonCreateL2NormalizationWorkloadTest<NeonL2NormalizationFloatWorkload, DataType::Float16>(DataLayout::NHWC);
837 }
838 #endif
839
840 TEST_CASE("CreateL2NormalizationNchwWorkload")
841 {
842 NeonCreateL2NormalizationWorkloadTest<NeonL2NormalizationFloatWorkload, DataType::Float32>(DataLayout::NCHW);
843 }
844
845 TEST_CASE("CreateL2NormalizationNhwcWorkload")
846 {
847 NeonCreateL2NormalizationWorkloadTest<NeonL2NormalizationFloatWorkload, DataType::Float32>(DataLayout::NHWC);
848 }
849
850 template <typename LogSoftmaxWorkloadType, typename armnn::DataType DataType>
NeonCreateLogSoftmaxWorkloadTest()851 static void NeonCreateLogSoftmaxWorkloadTest()
852 {
853 Graph graph;
854 NeonWorkloadFactory factory =
855 NeonWorkloadFactoryHelper::GetFactory(NeonWorkloadFactoryHelper::GetMemoryManager());
856
857 auto workload = CreateLogSoftmaxWorkloadTest<LogSoftmaxWorkloadType, DataType>(factory, graph);
858
859 // Checks that outputs and inputs are as we expect them (see definition of CreateLogSoftmaxWorkloadTest).
860 LogSoftmaxQueueDescriptor queueDescriptor = workload->GetData();
861 auto inputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
862 auto outputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Outputs[0]);
863 armnn::TensorInfo tensorInfo({4, 1}, DataType);
864
865 CHECK(TestNeonTensorHandleInfo(inputHandle, tensorInfo));
866 CHECK(TestNeonTensorHandleInfo(outputHandle, tensorInfo));
867 }
868
869 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
870 TEST_CASE("CreateLogSoftmaxFloat16Workload")
871 {
872 NeonCreateLogSoftmaxWorkloadTest<NeonLogSoftmaxWorkload, DataType::Float16>();
873 }
874 #endif
875
876 TEST_CASE("CreateLogSoftmaxFloatWorkload")
877 {
878 NeonCreateLogSoftmaxWorkloadTest<NeonLogSoftmaxWorkload, DataType::Float32>();
879 }
880
881 template <typename LstmWorkloadType>
NeonCreateLstmWorkloadTest()882 static void NeonCreateLstmWorkloadTest()
883 {
884 Graph graph;
885 NeonWorkloadFactory factory =
886 NeonWorkloadFactoryHelper::GetFactory(NeonWorkloadFactoryHelper::GetMemoryManager());
887
888 auto workload = CreateLstmWorkloadTest<LstmWorkloadType>(factory, graph);
889
890 LstmQueueDescriptor queueDescriptor = workload->GetData();
891
892 auto inputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
893 auto outputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Outputs[1]);
894
895 CHECK(TestNeonTensorHandleInfo(inputHandle, TensorInfo({ 2, 2 }, DataType::Float32)));
896 CHECK(TestNeonTensorHandleInfo(outputHandle, TensorInfo({ 2, 4 }, DataType::Float32)));
897 }
898
899 TEST_CASE("CreateLSTMWorkloadFloatWorkload")
900 {
901 NeonCreateLstmWorkloadTest<NeonLstmFloatWorkload>();
902 }
903
904 template <typename ConcatWorkloadType, armnn::DataType DataType>
NeonCreateConcatWorkloadTest(std::initializer_list<unsigned int> outputShape,unsigned int concatAxis)905 static void NeonCreateConcatWorkloadTest(std::initializer_list<unsigned int> outputShape,
906 unsigned int concatAxis)
907 {
908 Graph graph;
909 NeonWorkloadFactory factory =
910 NeonWorkloadFactoryHelper::GetFactory(NeonWorkloadFactoryHelper::GetMemoryManager());
911
912 auto workload = CreateConcatWorkloadTest<ConcatWorkloadType, DataType>(factory, graph, outputShape, concatAxis);
913
914 ConcatQueueDescriptor queueDescriptor = workload->GetData();
915 auto inputHandle0 = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
916 auto inputHandle1 = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Inputs[1]);
917 auto outputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Outputs[0]);
918
919 CHECK(TestNeonTensorHandleInfo(inputHandle0, TensorInfo({ 2, 3, 2, 5 }, DataType)));
920 CHECK(TestNeonTensorHandleInfo(inputHandle1, TensorInfo({ 2, 3, 2, 5 }, DataType)));
921 CHECK(TestNeonTensorHandleInfo(outputHandle, TensorInfo(outputShape, DataType)));
922 }
923
924 TEST_CASE("CreateConcatDim0Float32Workload")
925 {
926 NeonCreateConcatWorkloadTest<NeonConcatWorkload, armnn::DataType::Float32>({ 4, 3, 2, 5 }, 0);
927 }
928
929 TEST_CASE("CreateConcatDim1Float32Workload")
930 {
931 NeonCreateConcatWorkloadTest<NeonConcatWorkload, armnn::DataType::Float32>({ 2, 6, 2, 5 }, 1);
932 }
933
934 TEST_CASE("CreateConcatDim3Float32Workload")
935 {
936 NeonCreateConcatWorkloadTest<NeonConcatWorkload, armnn::DataType::Float32>({ 2, 3, 2, 10 }, 3);
937 }
938
939 TEST_CASE("CreateConcatDim0Uint8Workload")
940 {
941 NeonCreateConcatWorkloadTest<NeonConcatWorkload, armnn::DataType::QAsymmU8>({ 4, 3, 2, 5 }, 0);
942 }
943
944 TEST_CASE("CreateConcatDim1Uint8Workload")
945 {
946 NeonCreateConcatWorkloadTest<NeonConcatWorkload, armnn::DataType::QAsymmU8>({ 2, 6, 2, 5 }, 1);
947 }
948
949 TEST_CASE("CreateConcatDim3Uint8Workload")
950 {
951 NeonCreateConcatWorkloadTest<NeonConcatWorkload, armnn::DataType::QAsymmU8>({ 2, 3, 2, 10 }, 3);
952 }
953
954 template <armnn::DataType DataType>
NeonCreateStackWorkloadTest(const std::initializer_list<unsigned int> & inputShape,const std::initializer_list<unsigned int> & outputShape,unsigned int axis,unsigned int numInputs)955 static void NeonCreateStackWorkloadTest(const std::initializer_list<unsigned int>& inputShape,
956 const std::initializer_list<unsigned int>& outputShape,
957 unsigned int axis,
958 unsigned int numInputs)
959 {
960 armnn::Graph graph;
961 NeonWorkloadFactory factory =
962 NeonWorkloadFactoryHelper::GetFactory(NeonWorkloadFactoryHelper::GetMemoryManager());
963
964 auto workload = CreateStackWorkloadTest<NeonStackWorkload, DataType>(factory,
965 graph,
966 TensorShape(inputShape),
967 TensorShape(outputShape),
968 axis,
969 numInputs);
970
971 // Check inputs and output are as expected
972 StackQueueDescriptor queueDescriptor = workload->GetData();
973 for (unsigned int i = 0; i < numInputs; ++i)
974 {
975 auto inputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Inputs[i]);
976 CHECK(TestNeonTensorHandleInfo(inputHandle, TensorInfo(inputShape, DataType)));
977 }
978 auto outputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Outputs[0]);
979 CHECK(TestNeonTensorHandleInfo(outputHandle, TensorInfo(outputShape, DataType)));
980 }
981
982 TEST_CASE("CreateStackFloat32Workload")
983 {
984 NeonCreateStackWorkloadTest<armnn::DataType::Float32>({ 3, 4, 5 }, { 3, 4, 2, 5 }, 2, 2);
985 }
986
987 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
988 TEST_CASE("CreateStackFloat16Workload")
989 {
990 NeonCreateStackWorkloadTest<armnn::DataType::Float16>({ 3, 4, 5 }, { 3, 4, 2, 5 }, 2, 2);
991 }
992 #endif
993
994 TEST_CASE("CreateStackUint8Workload")
995 {
996 NeonCreateStackWorkloadTest<armnn::DataType::QAsymmU8>({ 3, 4, 5 }, { 3, 4, 2, 5 }, 2, 2);
997 }
998
999 template <typename QuantizedLstmWorkloadType>
NeonCreateQuantizedLstmWorkloadTest()1000 static void NeonCreateQuantizedLstmWorkloadTest()
1001 {
1002 Graph graph;
1003 NeonWorkloadFactory factory = NeonWorkloadFactoryHelper::GetFactory(NeonWorkloadFactoryHelper::GetMemoryManager());
1004
1005 auto workload = CreateQuantizedLstmWorkloadTest<QuantizedLstmWorkloadType>(factory, graph);
1006
1007 QuantizedLstmQueueDescriptor queueDescriptor = workload->GetData();
1008
1009 IAclTensorHandle* inputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
1010 CHECK((inputHandle->GetShape() == TensorShape({2, 2})));
1011 CHECK((inputHandle->GetDataType() == arm_compute::DataType::QASYMM8));
1012
1013 IAclTensorHandle* cellStateInHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Inputs[1]);
1014 CHECK((cellStateInHandle->GetShape() == TensorShape({2, 4})));
1015 CHECK((cellStateInHandle->GetDataType() == arm_compute::DataType::QSYMM16));
1016
1017 IAclTensorHandle* outputStateInHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Inputs[2]);
1018 CHECK((outputStateInHandle->GetShape() == TensorShape({2, 4})));
1019 CHECK((outputStateInHandle->GetDataType() == arm_compute::DataType::QASYMM8));
1020
1021 IAclTensorHandle* cellStateOutHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Outputs[0]);
1022 CHECK((cellStateOutHandle->GetShape() == TensorShape({2, 4})));
1023 CHECK((cellStateOutHandle->GetDataType() == arm_compute::DataType::QSYMM16));
1024
1025 IAclTensorHandle* outputStateOutHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Outputs[1]);
1026 CHECK((outputStateOutHandle->GetShape() == TensorShape({2, 4})));
1027 CHECK((outputStateOutHandle->GetDataType() == arm_compute::DataType::QASYMM8));
1028 }
1029
1030 TEST_CASE("CreateQuantizedLstmWorkload")
1031 {
1032 NeonCreateQuantizedLstmWorkloadTest<NeonQuantizedLstmWorkload>();
1033 }
1034
1035 template <typename QLstmWorkloadType>
NeonCreateQLstmWorkloadTest()1036 static void NeonCreateQLstmWorkloadTest()
1037 {
1038 Graph graph;
1039 NeonWorkloadFactory factory = NeonWorkloadFactoryHelper::GetFactory(NeonWorkloadFactoryHelper::GetMemoryManager());
1040
1041 auto workload = CreateQLstmWorkloadTest<QLstmWorkloadType>(factory, graph);
1042 QLstmQueueDescriptor queueDescriptor = workload->GetData();
1043
1044 IAclTensorHandle* inputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
1045 CHECK((inputHandle->GetShape() == TensorShape({2, 4})));
1046 CHECK((inputHandle->GetDataType() == arm_compute::DataType::QASYMM8_SIGNED));
1047
1048 IAclTensorHandle* cellStateOutHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Outputs[1]);
1049 CHECK((cellStateOutHandle->GetShape() == TensorShape({2, 4})));
1050 CHECK((cellStateOutHandle->GetDataType() == arm_compute::DataType::QSYMM16));
1051
1052 IAclTensorHandle* outputHandle = PolymorphicDowncast<IAclTensorHandle*>(queueDescriptor.m_Outputs[2]);
1053 CHECK((outputHandle->GetShape() == TensorShape({2, 4})));
1054 CHECK((outputHandle->GetDataType() == arm_compute::DataType::QASYMM8_SIGNED));
1055 }
1056
1057 TEST_CASE("CreateQLstmWorkloadTest")
1058 {
1059 NeonCreateQLstmWorkloadTest<NeonQLstmWorkload>();
1060 }
1061
1062 template <armnn::DataType DataType>
NeonCreateActivationWorkloadReplaceFunctionsTest()1063 static void NeonCreateActivationWorkloadReplaceFunctionsTest()
1064 {
1065 shared_ptr<NeonMemoryManager> memoryManager = make_shared<NeonMemoryManager>();
1066
1067 Graph graph;
1068 NeonWorkloadFactory factory = NeonWorkloadFactoryHelper::GetFactory(memoryManager);
1069 // input and output are created as armnn::TensorInfo tensorInfo({1, 1}, DataType)
1070 auto workloadPtr = CreateActivationWorkloadTest<NeonActivationWorkload, DataType>(factory, graph);
1071
1072 // new input and output tensor handlers are created and then replace in the workload
1073 const NeonTensorHandleFactory tensorHandleFactory(memoryManager);
1074 TensorInfo inputInfo({2 , 2}, DataType::Float16);
1075 TensorInfo outputInfo({2 , 2}, DataType::Float16);
1076 unique_ptr<ITensorHandle> inputHandle = tensorHandleFactory.CreateTensorHandle(inputInfo);
1077 inputHandle->Allocate();
1078 unique_ptr<ITensorHandle> outputHandle = tensorHandleFactory.CreateTensorHandle(outputInfo);
1079 outputHandle->Allocate();
1080
1081 unsigned int slot = 0;
1082 CHECK_THROWS_AS(workloadPtr->ReplaceInputTensorHandle(inputHandle.get(), slot), UnimplementedException);
1083 CHECK_THROWS_AS(workloadPtr->ReplaceOutputTensorHandle(outputHandle.get(), slot), UnimplementedException);
1084 }
1085
1086 TEST_CASE("NeonReplaceFunctionsfromFloat32toFloat16ActivationWorkload")
1087 {
1088 NeonCreateActivationWorkloadReplaceFunctionsTest<armnn::DataType::Float32>();
1089 }
1090
1091 TEST_CASE("NeonReplaceFunctionsfromUint8toFloat16ActivationWorkload")
1092 {
1093 NeonCreateActivationWorkloadReplaceFunctionsTest<armnn::DataType::QAsymmU8>();
1094 }
1095
1096 }
1097