• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //
2 // Copyright © 2017,2021-2023 Arm Ltd and Contributors. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5 #pragma once
6 
7 #include "TestUtils.hpp"
8 
9 #include <Graph.hpp>
10 #include <Network.hpp>
11 #include <ResolveType.hpp>
12 
13 #include <armnnUtils/DataLayoutIndexed.hpp>
14 #include <armnn/backends/TensorHandle.hpp>
15 #include <armnn/backends/WorkloadData.hpp>
16 #include <armnn/backends/WorkloadFactory.hpp>
17 #include <armnn/utility/Assert.hpp>
18 #include <armnn/utility/IgnoreUnused.hpp>
19 #include <armnn/utility/PolymorphicDowncast.hpp>
20 
21 #include <doctest/doctest.h>
22 
23 #include <utility>
24 
25 using namespace armnn;
26 
27 namespace
28 {
29 
30 using namespace std;
31 
32 // Calls CreateWorkload for a layer, and checks the returned pointer is of the correct type.
33 template<typename Workload>
MakeAndCheckWorkload(Layer & layer,const IWorkloadFactory & factory,const ModelOptions & modelOptions={})34 std::unique_ptr<Workload> MakeAndCheckWorkload(Layer& layer,
35                                                const IWorkloadFactory& factory,
36                                                const ModelOptions& modelOptions = {})
37 {
38     std::unique_ptr<IWorkload> workload = layer.CreateWorkload(factory);
39     CHECK_MESSAGE(workload.get() == PolymorphicDowncast<Workload*>(workload.get()),
40                "Cannot convert to derived class");
41     std::string reasonIfUnsupported;
42     layer.SetBackendId(factory.GetBackendId());
43     CHECK(factory.IsLayerSupported(layer, layer.GetDataType(), reasonIfUnsupported, modelOptions));
44     return std::unique_ptr<Workload>(static_cast<Workload*>(workload.release()));
45 }
46 
47 // Helper function to create tensor handlers for workloads, assuming they all use the same factory.
CreateTensorHandles(armnn::Graph & graph,armnn::IWorkloadFactory & factory)48 void CreateTensorHandles(armnn::Graph& graph,
49                          armnn::IWorkloadFactory& factory)
50 {
51     TensorHandleFactoryRegistry tmpRegistry;
52     for (auto&& layer : graph.TopologicalSort())
53     {
54         layer->CreateTensorHandles(tmpRegistry, factory);
55     }
56 }
57 
58 /////////////////////////////////////////////////////////////////////////////////////////////
59 // The following functions are called by backendsCommon/test/CreateWorkload*.cpp
60 // They build very simple graphs, and then create a workload.
61 // Some checks are performed on the workload to ensure parameters have been passed correctly.
62 // They return the created workloads so that backend-specific checks can be performed.
63 /////////////////////////////////////////////////////////////////////////////////////////////
64 
65 template <typename ActivationWorkload, armnn::DataType DataType>
CreateActivationWorkloadTest(armnn::IWorkloadFactory & factory,armnn::Graph & graph)66 std::unique_ptr<ActivationWorkload> CreateActivationWorkloadTest(armnn::IWorkloadFactory& factory,
67                                                                  armnn::Graph&            graph)
68 {
69     // Creates the layer we're testing.
70     ActivationDescriptor layerDesc;
71     layerDesc.m_Function = ActivationFunction::ReLu;
72     layerDesc.m_A        = 3.5f;
73     layerDesc.m_B        = -10.0f;
74 
75     ActivationLayer* const layer = graph.AddLayer<ActivationLayer>(layerDesc, "layer");
76 
77     // Creates extra layers.
78     Layer* const input = graph.AddLayer<InputLayer>(0, "input");
79     Layer* const output = graph.AddLayer<OutputLayer>(0, "output");
80 
81     // Connects up.
82     armnn::TensorInfo tensorInfo({1, 1}, DataType);
83 
84     Connect(input, layer, tensorInfo);
85     Connect(layer, output, tensorInfo);
86 
87     CreateTensorHandles(graph, factory);
88 
89     // Makes the workload and checks it.
90     auto workload = MakeAndCheckWorkload<ActivationWorkload>(*layer, factory);
91 
92     ActivationQueueDescriptor queueDescriptor = workload->GetData();
93     CHECK(queueDescriptor.m_Inputs.size() == 1);
94     CHECK(queueDescriptor.m_Outputs.size() == 1);
95     CHECK(queueDescriptor.m_Parameters.m_A == 3.5f);
96     CHECK(queueDescriptor.m_Parameters.m_B == -10.0f);
97     CHECK((queueDescriptor.m_Parameters.m_Function == ActivationFunction::ReLu));
98 
99     // Returns so we can do extra, backend-specific tests.
100     return workload;
101 }
102 
103 template <typename WorkloadType,
104           typename DescriptorType,
105           typename LayerType,
106           armnn::DataType DataType>
CreateElementwiseWorkloadTest(armnn::IWorkloadFactory & factory,armnn::Graph & graph)107 std::unique_ptr<WorkloadType> CreateElementwiseWorkloadTest(armnn::IWorkloadFactory & factory,
108                                                             armnn::Graph & graph)
109 {
110     // Creates the layer we're testing.
111     Layer* const layer = graph.AddLayer<LayerType>("layer");
112 
113     // Creates extra layers.
114     Layer* const input1 = graph.AddLayer<InputLayer>(1, "input1");
115     Layer* const input2 = graph.AddLayer<InputLayer>(2, "input2");
116     Layer* const output = graph.AddLayer<OutputLayer>(0, "output");
117 
118     // Connects up.
119     armnn::TensorInfo tensorInfo({2, 3}, DataType);
120     Connect(input1, layer, tensorInfo, 0, 0);
121     Connect(input2, layer, tensorInfo, 0, 1);
122     Connect(layer, output, tensorInfo);
123     CreateTensorHandles(graph, factory);
124 
125     // Makes the workload and checks it.
126     auto workload = MakeAndCheckWorkload<WorkloadType>(*layer, factory);
127 
128     auto queueDescriptor = workload->GetData();
129     CHECK(queueDescriptor.m_Inputs.size() == 2);
130     CHECK(queueDescriptor.m_Outputs.size() == 1);
131 
132     // Returns so we can do extra, backend-specific tests.
133     return workload;
134 }
135 
136 template <typename WorkloadType, armnn::DataType DataType>
CreateElementwiseBinaryWorkloadTest(armnn::IWorkloadFactory & factory,armnn::Graph & graph,armnn::BinaryOperation binaryOperation)137 std::unique_ptr<WorkloadType> CreateElementwiseBinaryWorkloadTest(armnn::IWorkloadFactory & factory,
138                                                                   armnn::Graph & graph,
139                                                                   armnn::BinaryOperation binaryOperation)
140 {
141     // Creates the layer we're testing.
142     ElementwiseBinaryDescriptor descriptor(binaryOperation);
143     //ElementwiseBinaryDescriptor descriptor = ElementwiseBinaryDescriptor(binaryOperation);
144 
145     Layer* const layer = graph.AddLayer<ElementwiseBinaryLayer>(descriptor, "layer");
146 
147     // Creates extra layers.
148     Layer* const input1 = graph.AddLayer<InputLayer>(1, "input1");
149     Layer* const input2 = graph.AddLayer<InputLayer>(2, "input2");
150     Layer* const output = graph.AddLayer<OutputLayer>(0, "output");
151 
152     // Connects up.
153     armnn::TensorInfo tensorInfo({2, 3}, DataType);
154     Connect(input1, layer, tensorInfo, 0, 0);
155     Connect(input2, layer, tensorInfo, 0, 1);
156     Connect(layer, output, tensorInfo);
157     CreateTensorHandles(graph, factory);
158 
159     // Makes the workload and checks it.
160     auto workload = MakeAndCheckWorkload<WorkloadType>(*layer, factory);
161 
162     auto queueDescriptor = workload->GetData();
163     CHECK(queueDescriptor.m_Inputs.size() == 2);
164     CHECK(queueDescriptor.m_Outputs.size() == 1);
165 
166     // Returns so we can do extra, backend-specific tests.
167     return workload;
168 }
169 
170 template<typename WorkloadType,
171          typename DescriptorType,
172          armnn::DataType DataType>
CreateSubtractionWithBlobWorkloadTest(armnn::IWorkloadFactory & factory,armnn::Graph & graph)173 std::unique_ptr<WorkloadType> CreateSubtractionWithBlobWorkloadTest(armnn::IWorkloadFactory& factory,
174                                                                     armnn::Graph& graph)
175 {
176     // Creates the layer we're testing.
177     SubtractionLayer* const layer = graph.AddLayer<SubtractionLayer>("layer");
178 
179     auto activationDesc = std::make_shared<ActivationDescriptor>();
180     activationDesc->m_A        = 10.0f;
181     activationDesc->m_B        = 5.0f;
182     activationDesc->m_Function = armnn::ActivationFunction::BoundedReLu;
183 
184     layer->SetAdditionalInfoForObject(activationDesc);
185 
186     // Creates extra layers.
187     Layer* const input1 = graph.AddLayer<InputLayer>(1, "input1");
188     Layer* const input2 = graph.AddLayer<InputLayer>(2, "input2");
189     Layer* const output = graph.AddLayer<OutputLayer>(0, "output");
190 
191     // Connects up.
192     armnn::TensorInfo tensorInfo({2, 3}, DataType);
193     Connect(input1, layer, tensorInfo, 0, 0);
194     Connect(input2, layer, tensorInfo, 0, 1);
195     Connect(layer, output, tensorInfo);
196     CreateTensorHandles(graph, factory);
197 
198     // Check that the additional information can be queried from the layer
199     std::shared_ptr<ActivationDescriptor>
200         activationDescPtr = layer->GetAdditionalInformation<ActivationDescriptor>();
201 
202     ARMNN_ASSERT(static_cast<float>(activationDescPtr->m_A) == 10.0f);
203     ARMNN_ASSERT(static_cast<float>(activationDescPtr->m_B) == 5.0f);
204     ARMNN_ASSERT(
205         static_cast<ActivationFunction>(activationDescPtr->m_Function) == armnn::ActivationFunction::BoundedReLu
206     );
207 
208     // Makes the workload and checks it.
209     auto workload = MakeAndCheckWorkload<WorkloadType>(*layer, factory);
210 
211     DescriptorType queueDescriptor = workload->GetData();
212 
213     const ActivationDescriptor* queueDescBlobPtr =
214         queueDescriptor.template GetAdditionalInformation<ActivationDescriptor>();
215     IgnoreUnused(queueDescBlobPtr);
216     ARMNN_ASSERT(static_cast<float>(queueDescBlobPtr->m_A) == 10.0f);
217     ARMNN_ASSERT(static_cast<float>(queueDescBlobPtr->m_B) == 5.0f);
218     ARMNN_ASSERT(
219         static_cast<ActivationFunction>(queueDescBlobPtr->m_Function) == armnn::ActivationFunction::BoundedReLu
220     );
221 
222     CHECK(queueDescriptor.m_Inputs.size() == 2);
223     CHECK(queueDescriptor.m_Outputs.size() == 1);
224 
225     return workload;
226 }
227 
228 
229 template<typename WorkloadType,
230          typename DescriptorType,
231          armnn::DataType DataType>
CreateMultiplicationWithBlobWorkloadTest(armnn::IWorkloadFactory & factory,armnn::Graph & graph)232 std::unique_ptr<WorkloadType> CreateMultiplicationWithBlobWorkloadTest(armnn::IWorkloadFactory& factory,
233                                                                        armnn::Graph& graph)
234 {
235     // Creates the layer we're testing.
236     MultiplicationLayer* const layer = graph.AddLayer<MultiplicationLayer>("layer");
237 
238     auto activationDesc = std::make_shared<ActivationDescriptor>();
239     activationDesc->m_A        = 10.0f;
240     activationDesc->m_B        = 5.0f;
241     activationDesc->m_Function = armnn::ActivationFunction::BoundedReLu;
242 
243     layer->SetAdditionalInfoForObject(activationDesc);
244 
245     // Creates extra layers.
246     Layer* const input1 = graph.AddLayer<InputLayer>(1, "input1");
247     Layer* const input2 = graph.AddLayer<InputLayer>(2, "input2");
248     Layer* const output = graph.AddLayer<OutputLayer>(0, "output");
249 
250     // Connects up.
251     armnn::TensorInfo tensorInfo({2, 3}, DataType);
252     Connect(input1, layer, tensorInfo, 0, 0);
253     Connect(input2, layer, tensorInfo, 0, 1);
254     Connect(layer, output, tensorInfo);
255     CreateTensorHandles(graph, factory);
256 
257     // Check that the additional information can be queried from the layer
258     std::shared_ptr<ActivationDescriptor>
259         activationDescPtr = layer->GetAdditionalInformation<ActivationDescriptor>();
260 
261     ARMNN_ASSERT(static_cast<float>(activationDescPtr->m_A) == 10.0f);
262     ARMNN_ASSERT(static_cast<float>(activationDescPtr->m_B) == 5.0f);
263     ARMNN_ASSERT(
264         static_cast<ActivationFunction>(activationDescPtr->m_Function) == armnn::ActivationFunction::BoundedReLu
265     );
266 
267     // Makes the workload and checks it.
268     auto workload = MakeAndCheckWorkload<WorkloadType>(*layer, factory);
269 
270     DescriptorType queueDescriptor = workload->GetData();
271     CHECK(queueDescriptor.m_Inputs.size() == 2);
272     CHECK(queueDescriptor.m_Outputs.size() == 1);
273     const ActivationDescriptor* queueDescBlobPtr =
274         queueDescriptor.template GetAdditionalInformation<ActivationDescriptor>();
275     IgnoreUnused(queueDescBlobPtr);
276     ARMNN_ASSERT(static_cast<float>(queueDescBlobPtr->m_A) == 10.0f);
277     ARMNN_ASSERT(static_cast<float>(queueDescBlobPtr->m_B) == 5.0f);
278     ARMNN_ASSERT(
279         static_cast<ActivationFunction>(queueDescBlobPtr->m_Function) == armnn::ActivationFunction::BoundedReLu
280     );
281 
282     return workload;// Returns so we can do extra, backend-specific tests.
283 }
284 
285 template<typename WorkloadType,
286          typename DescriptorType,
287          armnn::DataType DataType>
CreateAdditionWithBlobWorkloadTest(armnn::IWorkloadFactory & factory,armnn::Graph & graph)288 std::unique_ptr<WorkloadType> CreateAdditionWithBlobWorkloadTest(armnn::IWorkloadFactory& factory,
289                                                                  armnn::Graph& graph)
290 {
291     // Creates the layer we're testing.
292     AdditionLayer* const layer = graph.AddLayer<AdditionLayer>("layer");
293 
294     auto activationDesc = std::make_shared<ActivationDescriptor>();
295     activationDesc->m_A        = 10.0f;
296     activationDesc->m_B        = 5.0f;
297     activationDesc->m_Function = armnn::ActivationFunction::BoundedReLu;
298 
299     layer->SetAdditionalInfoForObject(activationDesc);
300 
301     // Creates extra layers.
302     Layer* const input1 = graph.AddLayer<InputLayer>(1, "input1");
303     Layer* const input2 = graph.AddLayer<InputLayer>(2, "input2");
304     Layer* const output = graph.AddLayer<OutputLayer>(0, "output");
305 
306     // Connects up.
307     armnn::TensorInfo tensorInfo({2, 3}, DataType);
308     Connect(input1, layer, tensorInfo, 0, 0);
309     Connect(input2, layer, tensorInfo, 0, 1);
310     Connect(layer, output, tensorInfo);
311     CreateTensorHandles(graph, factory);
312 
313     // Check that the additional information can be queried from the layer
314     std::shared_ptr<ActivationDescriptor>
315         activationDescPtr = layer->template GetAdditionalInformation<ActivationDescriptor>();
316 
317     ARMNN_ASSERT(static_cast<float>(activationDescPtr->m_A) == 10.0f);
318     ARMNN_ASSERT(static_cast<float>(activationDescPtr->m_B) == 5.0f);
319     ARMNN_ASSERT(
320         static_cast<ActivationFunction>(activationDescPtr->m_Function) == armnn::ActivationFunction::BoundedReLu
321     );
322 
323     // Makes the workload and checks it.
324     auto workload = MakeAndCheckWorkload<WorkloadType>(*layer, factory);
325 
326     DescriptorType queueDescriptor = workload->GetData();
327     const ActivationDescriptor* queueDescBlobPtr =
328         queueDescriptor.template GetAdditionalInformation<ActivationDescriptor>();
329     IgnoreUnused(queueDescBlobPtr);
330     CHECK(queueDescriptor.m_Inputs.size() == 2);
331     CHECK(queueDescriptor.m_Outputs.size() == 1);
332     ARMNN_ASSERT(static_cast<float>(queueDescBlobPtr->m_A) == 10.0f);
333     ARMNN_ASSERT(static_cast<float>(queueDescBlobPtr->m_B) == 5.0f);
334     ARMNN_ASSERT(
335         static_cast<ActivationFunction>(queueDescBlobPtr->m_Function) == armnn::ActivationFunction::BoundedReLu
336     );
337 
338     return workload;
339 }
340 
341 template <typename WorkloadType,
342           typename DescriptorType,
343           armnn::DataType DataType>
CreateElementwiseUnaryWorkloadTest(armnn::IWorkloadFactory & factory,armnn::Graph & graph,armnn::UnaryOperation op)344 std::unique_ptr<WorkloadType> CreateElementwiseUnaryWorkloadTest(armnn::IWorkloadFactory & factory,
345                                                                  armnn::Graph & graph,
346                                                                  armnn::UnaryOperation op)
347 {
348     ElementwiseUnaryDescriptor desc = ElementwiseUnaryDescriptor(op);
349     Layer* const layer = graph.AddLayer<armnn::ElementwiseUnaryLayer>(desc, "layer");
350 
351     Layer* const input  = graph.AddLayer<InputLayer>(0, "input");
352     Layer* const output = graph.AddLayer<OutputLayer>(0, "output");
353 
354     armnn::TensorInfo tensorInfo({ 2, 3 }, DataType);
355     Connect(input, layer, tensorInfo, 0, 0);
356     Connect(layer, output, tensorInfo, 0, 0);
357     CreateTensorHandles(graph, factory);
358 
359     auto workload = MakeAndCheckWorkload<WorkloadType>(*layer, factory);
360     DescriptorType queueDescriptor = workload->GetData();
361 
362     CHECK(queueDescriptor.m_Inputs.size()  == 1);
363     CHECK(queueDescriptor.m_Outputs.size() == 1);
364 
365     return workload;
366 }
367 
368 template <typename BatchNormalizationWorkloadType, armnn::DataType DataType>
CreateBatchNormalizationWorkloadTest(armnn::IWorkloadFactory & factory,armnn::Graph & graph,DataLayout dataLayout=DataLayout::NCHW)369 std::unique_ptr<BatchNormalizationWorkloadType> CreateBatchNormalizationWorkloadTest(
370     armnn::IWorkloadFactory& factory, armnn::Graph& graph, DataLayout dataLayout = DataLayout::NCHW)
371 {
372     TensorShape tensorShape;
373     switch (dataLayout)
374     {
375         case DataLayout::NHWC:
376             tensorShape = { 2, 4, 4, 3 };
377             break;
378         case DataLayout::NCHW:
379         default:
380             tensorShape = { 2, 3, 4, 4 };
381     }
382 
383     // Creates the layer we're testing.
384     BatchNormalizationDescriptor layerDesc;
385     layerDesc.m_Eps = 0.05f;
386     layerDesc.m_DataLayout = dataLayout;
387 
388     BatchNormalizationLayer* const layer = graph.AddLayer<BatchNormalizationLayer>(layerDesc, "layer");
389 
390     armnn::TensorInfo weightInfo({3}, DataType);
391     layer->m_Mean     = std::make_unique<ScopedTensorHandle>(weightInfo);
392     layer->m_Variance = std::make_unique<ScopedTensorHandle>(weightInfo);
393     layer->m_Beta     = std::make_unique<ScopedTensorHandle>(weightInfo);
394     layer->m_Gamma    = std::make_unique<ScopedTensorHandle>(weightInfo);
395     layer->m_Mean->Allocate();
396     layer->m_Variance->Allocate();
397     layer->m_Beta->Allocate();
398     layer->m_Gamma->Allocate();
399 
400     // Creates extra layers.
401     Layer* const input = graph.AddLayer<InputLayer>(0, "input");
402     Layer* const output = graph.AddLayer<OutputLayer>(0, "output");
403 
404     // Connects up.
405     armnn::TensorInfo tensorInfo(tensorShape, DataType);
406     Connect(input, layer, tensorInfo);
407     Connect(layer, output, tensorInfo);
408     CreateTensorHandles(graph, factory);
409 
410     // Makes the workload and checks it.
411     auto workload = MakeAndCheckWorkload<BatchNormalizationWorkloadType>(*layer, factory);
412     BatchNormalizationQueueDescriptor queueDescriptor = workload->GetData();
413     CHECK(queueDescriptor.m_Parameters.m_Eps == 0.05f);
414     CHECK(queueDescriptor.m_Inputs.size() == 1);
415     CHECK(queueDescriptor.m_Outputs.size() == 1);
416     CHECK((queueDescriptor.m_Mean->GetTensorInfo() == TensorInfo({3}, DataType)));
417     CHECK((queueDescriptor.m_Variance->GetTensorInfo() == TensorInfo({3}, DataType)));
418     CHECK((queueDescriptor.m_Gamma->GetTensorInfo() == TensorInfo({3}, DataType)));
419     CHECK((queueDescriptor.m_Beta->GetTensorInfo() == TensorInfo({3}, DataType)));
420     CHECK((queueDescriptor.m_Parameters.m_DataLayout == dataLayout));
421 
422     // Returns so we can do extra, backend-specific tests.
423     return workload;
424 }
425 
426 template <typename BatchNormalizationWorkloadType, armnn::DataType DataType>
CreateBatchNormalizationWithBlobWorkloadTest(armnn::IWorkloadFactory & factory,armnn::Graph & graph,DataLayout dataLayout=DataLayout::NCHW)427 std::unique_ptr<BatchNormalizationWorkloadType> CreateBatchNormalizationWithBlobWorkloadTest(
428     armnn::IWorkloadFactory& factory, armnn::Graph& graph, DataLayout dataLayout = DataLayout::NCHW)
429 {
430     TensorShape tensorShape;
431     switch (dataLayout)
432     {
433         case DataLayout::NHWC:
434             tensorShape = { 2, 4, 4, 3 };
435             break;
436         case DataLayout::NCHW:
437         default:
438             tensorShape = { 2, 3, 4, 4 };
439     }
440 
441     // Creates the layer we're testing.
442     BatchNormalizationDescriptor layerDesc;
443     layerDesc.m_Eps = 0.05f;
444     layerDesc.m_DataLayout = dataLayout;
445 
446     BatchNormalizationLayer* const layer = graph.AddLayer<BatchNormalizationLayer>(layerDesc, "layer");
447 
448     armnn::TensorInfo weightInfo({3}, DataType);
449     layer->m_Mean     = std::make_unique<ScopedTensorHandle>(weightInfo);
450     layer->m_Variance = std::make_unique<ScopedTensorHandle>(weightInfo);
451     layer->m_Beta     = std::make_unique<ScopedTensorHandle>(weightInfo);
452     layer->m_Gamma    = std::make_unique<ScopedTensorHandle>(weightInfo);
453     layer->m_Mean->Allocate();
454     layer->m_Variance->Allocate();
455     layer->m_Beta->Allocate();
456     layer->m_Gamma->Allocate();
457 
458     auto activationDesc = std::make_shared<ActivationDescriptor>();
459     activationDesc->m_A        = 10.0f;
460     activationDesc->m_B        = 5.0f;
461     activationDesc->m_Function = armnn::ActivationFunction::BoundedReLu;
462 
463     layer->SetAdditionalInfoForObject(activationDesc);
464 
465     // Check that the additional information can be queried from the layer
466     std::shared_ptr<ActivationDescriptor> activationDescPtr = layer->GetAdditionalInformation<ActivationDescriptor>();
467     ARMNN_ASSERT(static_cast<float>(activationDescPtr->m_A) == 10.0f);
468     ARMNN_ASSERT(static_cast<float>(activationDescPtr->m_B) == 5.0f);
469     ARMNN_ASSERT(
470         static_cast<ActivationFunction>(activationDescPtr->m_Function) == armnn::ActivationFunction::BoundedReLu
471     );
472 
473     // Creates extra layers.
474     Layer* const input = graph.AddLayer<InputLayer>(0, "input");
475     Layer* const output = graph.AddLayer<OutputLayer>(0, "output");
476 
477     // Connects up.
478     armnn::TensorInfo tensorInfo(tensorShape, DataType);
479     Connect(input, layer, tensorInfo);
480     Connect(layer, output, tensorInfo);
481     CreateTensorHandles(graph, factory);
482 
483     // Makes the workload and checks it.
484     auto workload = MakeAndCheckWorkload<BatchNormalizationWorkloadType>(*layer, factory);
485     BatchNormalizationQueueDescriptor queueDescriptor = workload->GetData();
486     const ActivationDescriptor* queueDescBlobPtr = queueDescriptor.GetAdditionalInformation<ActivationDescriptor>();
487     IgnoreUnused(queueDescBlobPtr);
488     ARMNN_ASSERT(static_cast<float>(queueDescBlobPtr->m_A) == 10.0f);
489     ARMNN_ASSERT(static_cast<float>(queueDescBlobPtr->m_B) == 5.0f);
490     ARMNN_ASSERT(
491         static_cast<ActivationFunction>(queueDescBlobPtr->m_Function) == armnn::ActivationFunction::BoundedReLu
492     );
493 
494     CHECK(queueDescriptor.m_Parameters.m_Eps == 0.05f);
495     CHECK(queueDescriptor.m_Inputs.size() == 1);
496     CHECK(queueDescriptor.m_Outputs.size() == 1);
497     CHECK((queueDescriptor.m_Mean->GetTensorInfo() == TensorInfo({3}, DataType)));
498     CHECK((queueDescriptor.m_Variance->GetTensorInfo() == TensorInfo({3}, DataType)));
499     CHECK((queueDescriptor.m_Gamma->GetTensorInfo() == TensorInfo({3}, DataType)));
500     CHECK((queueDescriptor.m_Beta->GetTensorInfo() == TensorInfo({3}, DataType)));
501     CHECK((queueDescriptor.m_Parameters.m_DataLayout == dataLayout));
502 
503     // Returns so we can do extra, backend-specific tests.
504     return workload;
505 }
506 
507 template <typename Convolution2dWorkload, armnn::DataType DataType>
CreateConvolution2dWorkloadTest(armnn::IWorkloadFactory & factory,armnn::Graph & graph,DataLayout dataLayout=DataLayout::NCHW,const ModelOptions & modelOptions={})508 std::unique_ptr<Convolution2dWorkload> CreateConvolution2dWorkloadTest(armnn::IWorkloadFactory& factory,
509                                                                        armnn::Graph&            graph,
510                                                                        DataLayout dataLayout = DataLayout::NCHW,
511                                                                        const ModelOptions& modelOptions = {})
512 {
513     // Creates the layer we're testing.
514     Convolution2dDescriptor layerDesc;
515     layerDesc.m_PadLeft = 3;
516     layerDesc.m_PadRight = 3;
517     layerDesc.m_PadTop = 1;
518     layerDesc.m_PadBottom = 1;
519     layerDesc.m_StrideX = 2;
520     layerDesc.m_StrideY = 4;
521     layerDesc.m_BiasEnabled = false;
522     layerDesc.m_DataLayout = dataLayout;
523 
524     float inputsQScale = 1.0f;
525     float outputQScale = DataType == armnn::DataType::QAsymmU8 ? 2.0f : 1.0;
526 
527     Convolution2dLayer* const layer = graph.AddLayer<Convolution2dLayer>(layerDesc, "layer");
528 
529     TensorShape weightShape = (dataLayout == DataLayout::NCHW) ? TensorShape{2, 3, 5, 3} : TensorShape{2, 5, 3, 3};
530     TensorShape inputShape  = (dataLayout == DataLayout::NCHW) ? TensorShape{2, 3, 8, 16} : TensorShape{2, 8, 16, 3};
531     TensorShape outputShape = (dataLayout == DataLayout::NCHW) ? TensorShape{2, 2, 2, 10} : TensorShape{2, 2, 10, 2};
532 
533     armnn::TensorInfo weightsTensorInfo(weightShape, DataType, inputsQScale);
534     weightsTensorInfo.SetConstant();
535 
536     // Creates extra layers.
537     Layer* const input = graph.AddLayer<InputLayer>(0, "input");
538     auto const weights = graph.AddLayer<ConstantLayer>("weights");
539     Layer* const output = graph.AddLayer<OutputLayer>(0, "output");
540 
541     weights->m_LayerOutput = std::make_unique<ScopedTensorHandle>(weightsTensorInfo);
542     weights->m_LayerOutput->Allocate();
543 
544     // Connects up.
545     Connect(input, layer, TensorInfo(inputShape, DataType, inputsQScale));
546     Connect(weights, layer, weightsTensorInfo, 0, 1);
547     Connect(layer, output, TensorInfo(outputShape, DataType, outputQScale));
548     CreateTensorHandles(graph, factory);
549 
550     // Makes the workload and checks it.
551     auto workload = MakeAndCheckWorkload<Convolution2dWorkload>(*layer, factory, modelOptions);
552 
553     Convolution2dQueueDescriptor queueDescriptor = workload->GetData();
554     CHECK(queueDescriptor.m_Parameters.m_StrideX == 2);
555     CHECK(queueDescriptor.m_Parameters.m_StrideY == 4);
556     CHECK(queueDescriptor.m_Parameters.m_PadLeft == 3);
557     CHECK(queueDescriptor.m_Parameters.m_PadRight == 3);
558     CHECK(queueDescriptor.m_Parameters.m_PadTop == 1);
559     CHECK(queueDescriptor.m_Parameters.m_PadBottom == 1);
560     CHECK(!queueDescriptor.m_Parameters.m_BiasEnabled);
561     CHECK((queueDescriptor.m_Parameters.m_DataLayout == dataLayout));
562 
563     CHECK(queueDescriptor.m_Inputs.size() == 2);
564     CHECK(queueDescriptor.m_Outputs.size() == 1);
565 
566     // Returns so we can do extra, backend-specific tests.
567     return workload;
568 }
569 
570 template<typename Convolution2dWorkload, armnn::DataType DataType>
CreateConvolution2dFusedActivationWithBlobWorkloadTest(armnn::IWorkloadFactory & factory,armnn::Graph & graph,DataLayout dataLayout=DataLayout::NCHW,const ModelOptions & modelOptions={})571 std::unique_ptr<Convolution2dWorkload> CreateConvolution2dFusedActivationWithBlobWorkloadTest(
572     armnn::IWorkloadFactory& factory,
573     armnn::Graph& graph,
574     DataLayout dataLayout = DataLayout::NCHW,
575     const ModelOptions& modelOptions = {})
576 {
577     // Creates the layer we're testing.
578     Convolution2dDescriptor layerDesc;
579     layerDesc.m_PadLeft = 3;
580     layerDesc.m_PadRight = 3;
581     layerDesc.m_PadTop = 1;
582     layerDesc.m_PadBottom = 1;
583     layerDesc.m_StrideX = 2;
584     layerDesc.m_StrideY = 4;
585     layerDesc.m_BiasEnabled = true;
586     layerDesc.m_DataLayout = dataLayout;
587 
588     float inputsQScale = 1.0f;
589     float outputQScale = DataType == armnn::DataType::QAsymmU8 ? 2.0f : 1.0;
590 
591     Convolution2dLayer* const layer = graph.AddLayer<Convolution2dLayer>(layerDesc, "layer");
592 
593     TensorShape weightShape = (dataLayout == DataLayout::NCHW) ? TensorShape{2, 3, 5, 3} : TensorShape{2, 5, 3, 3};
594     TensorShape inputShape  = (dataLayout == DataLayout::NCHW) ? TensorShape{2, 3, 8, 16} : TensorShape{2, 8, 16, 3};
595     TensorShape outputShape = (dataLayout == DataLayout::NCHW) ? TensorShape{2, 2, 2, 10} : TensorShape{2, 2, 10, 2};
596 
597     armnn::TensorInfo weightsTensorInfo(weightShape, DataType, inputsQScale);
598     weightsTensorInfo.SetConstant();
599     armnn::TensorInfo biasTensorInfo({2}, DataType, inputsQScale);
600     biasTensorInfo.SetConstant();
601 
602     auto activationDesc = std::make_shared<ActivationDescriptor>();
603     activationDesc->m_A        = 10.0f;
604     activationDesc->m_B        = 5.0f;
605     activationDesc->m_Function = armnn::ActivationFunction::BoundedReLu;
606 
607     layer->SetAdditionalInfoForObject(activationDesc);
608 
609     // Check that the additional information can be queried from the layer
610     std::shared_ptr<ActivationDescriptor> activationDescPtr = layer->GetAdditionalInformation<ActivationDescriptor>();
611 
612     ARMNN_ASSERT(static_cast<float>(activationDescPtr->m_A) == 10.0f);
613     ARMNN_ASSERT(static_cast<float>(activationDescPtr->m_B) == 5.0f);
614     ARMNN_ASSERT(
615         static_cast<ActivationFunction>(activationDescPtr->m_Function) == armnn::ActivationFunction::BoundedReLu
616     );
617 
618     // Creates extra layers.
619     Layer* const input = graph.AddLayer<InputLayer>(0, "input");
620     auto const weights = graph.AddLayer<ConstantLayer>("weights");
621     auto const bias = graph.AddLayer<ConstantLayer>("bias");
622     Layer* const output = graph.AddLayer<OutputLayer>(0, "output");
623 
624     weights->m_LayerOutput = std::make_unique<ScopedTensorHandle>(weightsTensorInfo);
625     weights->m_LayerOutput->Allocate();
626     bias->m_LayerOutput = std::make_unique<ScopedTensorHandle>(biasTensorInfo);
627     bias->m_LayerOutput->Allocate();
628 
629     // Connects up.
630     Connect(input, layer, TensorInfo(inputShape, DataType, inputsQScale));
631     Connect(weights, layer, weightsTensorInfo, 0, 1);
632     Connect(bias, layer, biasTensorInfo, 0, 2);
633     Connect(layer, output, TensorInfo(outputShape, DataType, outputQScale));
634     CreateTensorHandles(graph, factory);
635 
636     // Makes the workload and checks it.
637     auto workload = MakeAndCheckWorkload<Convolution2dWorkload>(*layer, factory, modelOptions);
638 
639     Convolution2dQueueDescriptor queueDescriptor = workload->GetData();
640     const ActivationDescriptor* queueDescBlobPtr = queueDescriptor.GetAdditionalInformation<ActivationDescriptor>();
641     IgnoreUnused(queueDescBlobPtr);
642     ARMNN_ASSERT(static_cast<float>(queueDescBlobPtr->m_A) == 10.0f);
643     ARMNN_ASSERT(static_cast<float>(queueDescBlobPtr->m_B) == 5.0f);
644     ARMNN_ASSERT(
645         static_cast<ActivationFunction>(queueDescBlobPtr->m_Function) == armnn::ActivationFunction::BoundedReLu
646     );
647 
648     CHECK(queueDescriptor.m_Parameters.m_StrideX == 2);
649     CHECK(queueDescriptor.m_Parameters.m_StrideY == 4);
650     CHECK(queueDescriptor.m_Parameters.m_PadLeft == 3);
651     CHECK(queueDescriptor.m_Parameters.m_PadRight == 3);
652     CHECK(queueDescriptor.m_Parameters.m_PadTop == 1);
653     CHECK(queueDescriptor.m_Parameters.m_PadBottom == 1);
654     CHECK(queueDescriptor.m_Parameters.m_BiasEnabled);
655     CHECK((queueDescriptor.m_Parameters.m_DataLayout == dataLayout));
656 
657     CHECK(queueDescriptor.m_Outputs.size() == 1);
658     CHECK(queueDescriptor.m_Inputs.size() == 3);
659 
660     // Returns so we can do extra, backend-specific tests.
661     return workload;
662 }
663 
664 template <typename Convolution2dWorkload, armnn::DataType DataType>
CreateConvolution2dWorkloadFastMathTest(armnn::IWorkloadFactory & factory,armnn::Graph & graph,DataLayout dataLayout=DataLayout::NCHW,const ModelOptions & modelOptions={})665 std::unique_ptr<Convolution2dWorkload> CreateConvolution2dWorkloadFastMathTest(armnn::IWorkloadFactory& factory,
666                                                                                armnn::Graph&            graph,
667                                                                                DataLayout dataLayout = DataLayout::NCHW,
668                                                                                const ModelOptions& modelOptions = {})
669 {
670     // Creates the layer we're testing.
671     Convolution2dDescriptor layerDesc;
672     layerDesc.m_PadLeft = 0;
673     layerDesc.m_PadRight = 0;
674     layerDesc.m_PadTop = 0;
675     layerDesc.m_PadBottom = 0;
676     layerDesc.m_StrideX = 1;
677     layerDesc.m_StrideY = 1;
678     layerDesc.m_BiasEnabled = true;
679     layerDesc.m_DataLayout = dataLayout;
680 
681     float inputsQScale = 1.0f;
682     float outputQScale = DataType == armnn::DataType::QAsymmU8 ? 2.0f : 1.0;
683 
684     Convolution2dLayer* const layer = graph.AddLayer<Convolution2dLayer>(layerDesc, "layer");
685 
686     TensorShape weightShape = TensorShape{ 32, 32, 3, 3 };
687     TensorShape biasShape = TensorShape{ 32 };
688     TensorShape inputShape = TensorShape{ 1, 32, 149, 149 };
689     TensorShape outputShape = TensorShape{ 1, 32, 147, 147 };
690 
691     armnn::TensorInfo weightsTensorInfo(weightShape, DataType, inputsQScale);
692     weightsTensorInfo.SetConstant();
693     armnn::TensorInfo biasTensorInfo(biasShape, DataType, inputsQScale);
694     biasTensorInfo.SetConstant();
695 
696     // Creates extra layers.
697     Layer* const input = graph.AddLayer<InputLayer>(0, "input");
698     auto const weights = graph.AddLayer<ConstantLayer>("weights");
699     auto const bias = graph.AddLayer<ConstantLayer>("bias");
700     Layer* const output = graph.AddLayer<OutputLayer>(0, "output");
701 
702     // Connects up.
703     Connect(input, layer, TensorInfo(inputShape, DataType));
704     Connect(weights, layer, weightsTensorInfo, 0, 1);
705     Connect(bias, layer, biasTensorInfo, 0, 2);
706     Connect(layer, output, TensorInfo(outputShape, DataType, outputQScale));
707     CreateTensorHandles(graph, factory);
708 
709     // Makes the workload and checks it.
710     auto workload = MakeAndCheckWorkload<Convolution2dWorkload>(*layer, factory, modelOptions);
711 
712     Convolution2dQueueDescriptor queueDescriptor = workload->GetData();
713     CHECK(queueDescriptor.m_Parameters.m_StrideX == 1);
714     CHECK(queueDescriptor.m_Parameters.m_StrideY == 1);
715     CHECK(queueDescriptor.m_Parameters.m_PadLeft == 0);
716     CHECK(queueDescriptor.m_Parameters.m_PadRight == 0);
717     CHECK(queueDescriptor.m_Parameters.m_PadTop == 0);
718     CHECK(queueDescriptor.m_Parameters.m_PadBottom == 0);
719     CHECK((queueDescriptor.m_Parameters.m_DataLayout == dataLayout));
720 
721     CHECK(queueDescriptor.m_Inputs.size() == 3);
722     CHECK(queueDescriptor.m_Outputs.size() == 1);
723 
724     // Returns so we can do extra, backend-specific tests.
725     return workload;
726 }
727 
728 template <typename LstmWorkload>
CreateLstmWorkloadTest(armnn::IWorkloadFactory & factory,armnn::Graph & graph)729 std::unique_ptr<LstmWorkload> CreateLstmWorkloadTest(armnn::IWorkloadFactory& factory, armnn::Graph& graph)
730 {
731     // This parameter setting is for withCifgWithPeepholeNoProjection
732     LstmDescriptor layerDesc;
733     layerDesc.m_ActivationFunc = 4;
734     layerDesc.m_ClippingThresCell = 0.0f;
735     layerDesc.m_ClippingThresProj = 0.0f;
736     layerDesc.m_CifgEnabled = true;
737     layerDesc.m_PeepholeEnabled = true;
738     layerDesc.m_ProjectionEnabled = false;
739 
740     LstmLayer* const layer = graph.AddLayer<LstmLayer>(layerDesc, "layer");
741     unsigned int batchSize = 2;
742     unsigned int inputSize = 2;
743     unsigned int numUnits = 4;
744     unsigned int outputSize = 4;
745 
746     layer->m_BasicParameters.m_InputToForgetWeights = std::make_unique<ScopedTensorHandle>
747             (TensorInfo({ numUnits, inputSize }, DataType::Float32));
748     layer->m_BasicParameters.m_InputToCellWeights = std::make_unique<ScopedTensorHandle>
749             (TensorInfo({ numUnits, inputSize }, DataType::Float32));
750     layer->m_BasicParameters.m_InputToOutputWeights = std::make_unique<ScopedTensorHandle>
751             (TensorInfo({ numUnits, inputSize }, DataType::Float32));
752     layer->m_BasicParameters.m_RecurrentToForgetWeights = std::make_unique<ScopedTensorHandle>
753             (TensorInfo({ numUnits, outputSize }, DataType::Float32));
754     layer->m_BasicParameters.m_RecurrentToCellWeights = std::make_unique<ScopedTensorHandle>
755             (TensorInfo({ numUnits, outputSize }, DataType::Float32));
756     layer->m_BasicParameters.m_RecurrentToOutputWeights = std::make_unique<ScopedTensorHandle>
757             (TensorInfo({ numUnits, outputSize }, DataType::Float32));
758     layer->m_BasicParameters.m_ForgetGateBias = std::make_unique<ScopedTensorHandle>
759             (TensorInfo({ numUnits }, DataType::Float32));
760     layer->m_BasicParameters.m_CellBias = std::make_unique<ScopedTensorHandle>
761             (TensorInfo({ numUnits }, DataType::Float32));
762     layer->m_BasicParameters.m_OutputGateBias = std::make_unique<ScopedTensorHandle>
763             (TensorInfo({ numUnits }, DataType::Float32));
764 
765     layer->m_BasicParameters.m_InputToForgetWeights->Allocate();
766     layer->m_BasicParameters.m_InputToCellWeights->Allocate();
767     layer->m_BasicParameters.m_InputToOutputWeights->Allocate();
768     layer->m_BasicParameters.m_RecurrentToForgetWeights->Allocate();
769     layer->m_BasicParameters.m_RecurrentToCellWeights->Allocate();
770     layer->m_BasicParameters.m_RecurrentToOutputWeights->Allocate();
771     layer->m_BasicParameters.m_ForgetGateBias->Allocate();
772     layer->m_BasicParameters.m_CellBias->Allocate();
773     layer->m_BasicParameters.m_OutputGateBias->Allocate();
774 
775 
776     if (layerDesc.m_PeepholeEnabled)
777     {
778         layer->m_PeepholeParameters.m_CellToForgetWeights = std::make_unique<ScopedTensorHandle>
779                 (TensorInfo({ numUnits }, DataType::Float32));
780         layer->m_PeepholeParameters.m_CellToOutputWeights = std::make_unique<ScopedTensorHandle>
781                 (TensorInfo({ numUnits }, DataType::Float32));
782         layer->m_PeepholeParameters.m_CellToForgetWeights->Allocate();
783         layer->m_PeepholeParameters.m_CellToOutputWeights->Allocate();
784     }
785 
786     // create input and output layers
787     Layer* const input = graph.AddLayer<InputLayer>(0, "input");
788     Layer* const outputStateIn = graph.AddLayer<InputLayer>(1, "outputStateIn");
789     Layer* const cellStateIn = graph.AddLayer<InputLayer>(2, "cellStateIn");
790     Layer* const scratchBuffer = graph.AddLayer<OutputLayer>(0, "scratchBuffer");
791     Layer* const outputStateOut = graph.AddLayer<OutputLayer>(1, "outputStateOut");
792     Layer* const cellStateOut = graph.AddLayer<OutputLayer>(2, "cellStateOut");
793     Layer* const output = graph.AddLayer<OutputLayer>(3, "output");
794 
795     // connect up
796     armnn::TensorInfo lstmTensorInfo1({ batchSize, inputSize }, DataType::Float32);
797     armnn::TensorInfo lstmTensorInfo2({ batchSize, numUnits}, DataType::Float32);
798     armnn::TensorInfo lstmTensorInfo3({ batchSize, outputSize }, DataType::Float32);
799     armnn::TensorInfo lstmTensorInfoScratchBuff({ batchSize, numUnits * (layerDesc.m_CifgEnabled ? 3 : 4) },
800                                                 DataType::Float32);
801     Connect(input, layer, lstmTensorInfo1, 0, 0);
802     Connect(cellStateIn, layer, lstmTensorInfo2, 0, 1);
803     Connect(outputStateIn, layer, lstmTensorInfo3, 0, 2);
804     Connect(layer, scratchBuffer, lstmTensorInfoScratchBuff, 0, 0);
805     Connect(layer, outputStateOut, lstmTensorInfo3, 1, 0);
806     Connect(layer, cellStateOut, lstmTensorInfo2, 2, 0);
807     Connect(layer, output, lstmTensorInfo3, 3, 0);
808 
809     CreateTensorHandles(graph, factory);
810 
811     // make the workload and check it
812     auto workload = MakeAndCheckWorkload<LstmWorkload>(*layer, factory);
813     LstmQueueDescriptor queueDescriptor = workload->GetData();
814     CHECK(queueDescriptor.m_Parameters.m_ActivationFunc == 4);
815     CHECK(queueDescriptor.m_Parameters.m_ClippingThresCell == 0.0f);
816     CHECK(queueDescriptor.m_Parameters.m_ClippingThresProj == 0.0f);
817     CHECK(queueDescriptor.m_Inputs.size() == 3);
818     CHECK(queueDescriptor.m_Outputs.size() == 4);
819 
820     CHECK((queueDescriptor.m_InputToForgetWeights->GetTensorInfo() == TensorInfo({ numUnits, inputSize },
821                                                                                      DataType::Float32)));
822     CHECK((queueDescriptor.m_OutputGateBias->GetTensorInfo() == TensorInfo({ numUnits },
823                                                                                      DataType::Float32)));
824     CHECK((queueDescriptor.m_CellBias->GetTensorInfo() == TensorInfo({ numUnits }, DataType::Float32)));
825     return workload;
826 }
827 
828 template <typename QuantizedLstmWorkload>
CreateQuantizedLstmWorkloadTest(armnn::IWorkloadFactory & factory,armnn::Graph & graph)829 std::unique_ptr<QuantizedLstmWorkload> CreateQuantizedLstmWorkloadTest(armnn::IWorkloadFactory& factory,
830                                                                        armnn::Graph& graph)
831 {
832     auto layer = graph.AddLayer<QuantizedLstmLayer>("quantizedLstmlayer");
833     unsigned int numBatches = 2;
834     unsigned int inputSize = 2;
835     unsigned int outputSize = 4;
836 
837     // Scale/Offset for input/output, cellState In/Out, weights, bias
838     float inputOutputScale = 0.0078125f;
839     int32_t inputOutputOffset = 128;
840 
841     float cellStateScale = 0.00048828125f;
842     int32_t cellStateOffset = 0;
843 
844     float weightsScale = 0.00408021f;
845     int32_t weightsOffset = 100;
846 
847     float biasScale = 3.1876640625e-05f;
848     int32_t biasOffset = 0;
849 
850     // Weights and bias tensor and quantization info
851     armnn::TensorInfo inputWeightsInfo({outputSize, inputSize},
852                                        armnn::DataType::QAsymmU8,
853                                        weightsScale,
854                                        weightsOffset);
855 
856     armnn::TensorInfo recurrentWeightsInfo({outputSize, outputSize},
857                                            armnn::DataType::QAsymmU8,
858                                            weightsScale,
859                                            weightsOffset);
860 
861     armnn::TensorInfo biasInfo({outputSize},
862                                armnn::DataType::Signed32,
863                                biasScale,
864                                biasOffset);
865 
866     // Weights and bias
867     layer->m_QuantizedLstmParameters.m_InputToInputWeights =
868             std::make_unique<ScopedTensorHandle>(inputWeightsInfo);
869     layer->m_QuantizedLstmParameters.m_InputToForgetWeights =
870             std::make_unique<ScopedTensorHandle>(inputWeightsInfo);
871     layer->m_QuantizedLstmParameters.m_InputToCellWeights =
872             std::make_unique<ScopedTensorHandle>(inputWeightsInfo);
873     layer->m_QuantizedLstmParameters.m_InputToOutputWeights =
874             std::make_unique<ScopedTensorHandle>(inputWeightsInfo);
875 
876     layer->m_QuantizedLstmParameters.m_RecurrentToInputWeights =
877             std::make_unique<ScopedTensorHandle>(recurrentWeightsInfo);
878     layer->m_QuantizedLstmParameters.m_RecurrentToForgetWeights =
879             std::make_unique<ScopedTensorHandle>(recurrentWeightsInfo);
880     layer->m_QuantizedLstmParameters.m_RecurrentToCellWeights =
881             std::make_unique<ScopedTensorHandle>(recurrentWeightsInfo);
882     layer->m_QuantizedLstmParameters.m_RecurrentToOutputWeights =
883             std::make_unique<ScopedTensorHandle>(recurrentWeightsInfo);
884 
885     layer->m_QuantizedLstmParameters.m_InputGateBias = std::make_unique<ScopedTensorHandle>(biasInfo);
886     layer->m_QuantizedLstmParameters.m_ForgetGateBias = std::make_unique<ScopedTensorHandle>(biasInfo);
887     layer->m_QuantizedLstmParameters.m_CellBias = std::make_unique<ScopedTensorHandle>(biasInfo);
888     layer->m_QuantizedLstmParameters.m_OutputGateBias = std::make_unique<ScopedTensorHandle>(biasInfo);
889 
890     // Allocate weights and bias
891     layer->m_QuantizedLstmParameters.m_InputToInputWeights->Allocate();
892     layer->m_QuantizedLstmParameters.m_InputToForgetWeights->Allocate();
893     layer->m_QuantizedLstmParameters.m_InputToCellWeights->Allocate();
894     layer->m_QuantizedLstmParameters.m_InputToOutputWeights->Allocate();
895 
896     layer->m_QuantizedLstmParameters.m_RecurrentToInputWeights->Allocate();
897     layer->m_QuantizedLstmParameters.m_RecurrentToForgetWeights->Allocate();
898     layer->m_QuantizedLstmParameters.m_RecurrentToCellWeights->Allocate();
899     layer->m_QuantizedLstmParameters.m_RecurrentToOutputWeights->Allocate();
900 
901     layer->m_QuantizedLstmParameters.m_InputGateBias->Allocate();
902     layer->m_QuantizedLstmParameters.m_ForgetGateBias->Allocate();
903     layer->m_QuantizedLstmParameters.m_CellBias->Allocate();
904     layer->m_QuantizedLstmParameters.m_OutputGateBias->Allocate();
905 
906     // Create input and output layers
907     Layer* const input = graph.AddLayer<InputLayer>(0, "input");
908     Layer* const cellStateIn = graph.AddLayer<InputLayer>(1, "cellStateIn");
909     Layer* const outputStateIn = graph.AddLayer<InputLayer>(2, "outputStateIn");
910 
911     Layer* const cellStateOut = graph.AddLayer<OutputLayer>(0, "cellStateOut");
912     Layer* const outputStateOut = graph.AddLayer<OutputLayer>(1, "outputStateOut");
913 
914     // Input/output tensor info and quantization info
915     armnn::TensorInfo inputInfo({numBatches , inputSize},
916                                 armnn::DataType::QAsymmU8,
917                                 inputOutputScale,
918                                 inputOutputOffset);
919 
920     armnn::TensorInfo cellStateInfo({numBatches , outputSize},
921                                     armnn::DataType::QSymmS16,
922                                     cellStateScale,
923                                     cellStateOffset);
924 
925     armnn::TensorInfo outputStateInfo({numBatches , outputSize},
926                                       armnn::DataType::QAsymmU8,
927                                       inputOutputScale,
928                                       inputOutputOffset);
929 
930     // Connect input/output slots
931     Connect(input, layer, inputInfo, 0, 0);
932     Connect(cellStateIn, layer, cellStateInfo, 0, 1);
933     Connect(outputStateIn, layer, outputStateInfo, 0, 2);
934 
935     Connect(layer, cellStateOut, cellStateInfo, 0, 0);
936     Connect(layer, outputStateOut, outputStateInfo, 1, 0);
937 
938     CreateTensorHandles(graph, factory);
939 
940     // Create workload and check layer support
941     auto workload = MakeAndCheckWorkload<QuantizedLstmWorkload>(*layer, factory);
942     QuantizedLstmQueueDescriptor queueDescriptor = workload->GetData();
943 
944     // Validate input/output sizes
945     CHECK(queueDescriptor.m_Inputs.size() == 3);
946     CHECK(queueDescriptor.m_Outputs.size() == 2);
947 
948     // Validate weight tensor info
949     CHECK((queueDescriptor.m_InputToInputWeights->GetTensorInfo() == inputWeightsInfo));
950     CHECK((queueDescriptor.m_InputToForgetWeights->GetTensorInfo() == inputWeightsInfo));
951     CHECK((queueDescriptor.m_InputToCellWeights->GetTensorInfo() == inputWeightsInfo));
952     CHECK((queueDescriptor.m_InputToOutputWeights->GetTensorInfo() == inputWeightsInfo));
953 
954     CHECK((queueDescriptor.m_RecurrentToInputWeights->GetTensorInfo() == recurrentWeightsInfo));
955     CHECK((queueDescriptor.m_RecurrentToForgetWeights->GetTensorInfo() == recurrentWeightsInfo));
956     CHECK((queueDescriptor.m_RecurrentToCellWeights->GetTensorInfo() == recurrentWeightsInfo));
957     CHECK((queueDescriptor.m_RecurrentToOutputWeights->GetTensorInfo() == recurrentWeightsInfo));
958 
959     CHECK((queueDescriptor.m_InputGateBias->GetTensorInfo() == biasInfo));
960     CHECK((queueDescriptor.m_ForgetGateBias->GetTensorInfo() == biasInfo));
961     CHECK((queueDescriptor.m_CellBias->GetTensorInfo() == biasInfo));
962     CHECK((queueDescriptor.m_OutputGateBias->GetTensorInfo() == biasInfo));
963 
964     return workload;
965 }
966 
967 template <typename QLstmWorkload>
CreateQLstmWorkloadTest(armnn::IWorkloadFactory & factory,armnn::Graph & graph)968 std::unique_ptr<QLstmWorkload> CreateQLstmWorkloadTest(armnn::IWorkloadFactory& factory,
969                                                        armnn::Graph& graph)
970 {
971     QLstmDescriptor layerDesc;
972     layerDesc.m_CifgEnabled       = true;
973     layerDesc.m_PeepholeEnabled   = false;
974     layerDesc.m_ProjectionEnabled = false;
975     layerDesc.m_LayerNormEnabled  = true;
976 
977     layerDesc.m_CellClip       = 0.0f;
978     layerDesc.m_ProjectionClip = 0.0f;
979 
980     layerDesc.m_HiddenStateZeroPoint = 0;
981     layerDesc.m_HiddenStateScale     = 0.007f;
982 
983     layerDesc.m_InputIntermediateScale  = 0.007059f;
984     layerDesc.m_ForgetIntermediateScale = 0.007812f;
985     layerDesc.m_CellIntermediateScale   = 0.007059f;
986     layerDesc.m_OutputIntermediateScale = 0.007812f;
987 
988     QLstmLayer* const layer = graph.AddLayer<QLstmLayer>(layerDesc, "qLstm");
989 
990     unsigned int numBatches = 2;
991     unsigned int inputSize  = 4;
992     unsigned int numUnits   = 4;
993     unsigned int outputSize = 4;
994 
995     // Scale/Offset quantization info
996     float inputScale    = 0.0078125f;
997     int32_t inputOffset = 0;
998 
999     // if (!projectionEnabled) outputScale == hiddenStateScale
1000     float outputScale    = layerDesc.m_HiddenStateScale;
1001     int32_t outputOffset = layerDesc.m_HiddenStateZeroPoint;
1002 
1003     float cellStateScale    = 3.05176e-05f;
1004     int32_t cellStateOffset = 0;
1005 
1006     float weightsScale    = 0.00784314f;
1007     int32_t weightsOffset = 0;
1008 
1009     float layerNormScale    = 3.05182e-05f;
1010     int32_t layerNormOffset = 0;
1011 
1012     float biasScale    = layerNormScale / 1024;
1013     int32_t biasOffset = 0;
1014 
1015     // Weights and bias tensor and quantization info
1016     armnn::TensorInfo inputWeightsInfo({outputSize, inputSize},
1017                                        armnn::DataType::QSymmS8,
1018                                        weightsScale,
1019                                        weightsOffset);
1020 
1021     armnn::TensorInfo recurrentWeightsInfo({outputSize, outputSize},
1022                                            armnn::DataType::QSymmS8,
1023                                            weightsScale,
1024                                            weightsOffset);
1025 
1026     armnn::TensorInfo biasInfo({outputSize}, armnn::DataType::Signed32, biasScale, biasOffset);
1027 
1028     armnn::TensorInfo layerNormWeightsInfo({numUnits}, armnn::DataType::QSymmS16, layerNormScale, layerNormOffset);
1029 
1030     // Create and allocate tensors
1031     layer->m_BasicParameters.m_InputToForgetWeights = std::make_unique<ScopedTensorHandle>(inputWeightsInfo);
1032     layer->m_BasicParameters.m_InputToCellWeights = std::make_unique<ScopedTensorHandle>(inputWeightsInfo);
1033     layer->m_BasicParameters.m_InputToOutputWeights = std::make_unique<ScopedTensorHandle>(inputWeightsInfo);
1034 
1035     layer->m_BasicParameters.m_RecurrentToForgetWeights =
1036             std::make_unique<ScopedTensorHandle>(recurrentWeightsInfo);
1037     layer->m_BasicParameters.m_RecurrentToCellWeights =
1038             std::make_unique<ScopedTensorHandle>(recurrentWeightsInfo);
1039     layer->m_BasicParameters.m_RecurrentToOutputWeights =
1040             std::make_unique<ScopedTensorHandle>(recurrentWeightsInfo);
1041 
1042     layer->m_BasicParameters.m_ForgetGateBias = std::make_unique<ScopedTensorHandle>(biasInfo);
1043     layer->m_BasicParameters.m_CellBias = std::make_unique<ScopedTensorHandle>(biasInfo);
1044     layer->m_BasicParameters.m_OutputGateBias = std::make_unique<ScopedTensorHandle>(biasInfo);
1045 
1046     layer->m_LayerNormParameters.m_ForgetLayerNormWeights =
1047             std::make_unique<ScopedTensorHandle>(layerNormWeightsInfo);
1048     layer->m_LayerNormParameters.m_CellLayerNormWeights =
1049             std::make_unique<ScopedTensorHandle>(layerNormWeightsInfo);
1050     layer->m_LayerNormParameters.m_OutputLayerNormWeights =
1051             std::make_unique<ScopedTensorHandle>(layerNormWeightsInfo);
1052 
1053     layer->m_BasicParameters.m_InputToForgetWeights->Allocate();
1054     layer->m_BasicParameters.m_InputToCellWeights->Allocate();
1055     layer->m_BasicParameters.m_InputToOutputWeights->Allocate();
1056 
1057     layer->m_BasicParameters.m_RecurrentToForgetWeights->Allocate();
1058     layer->m_BasicParameters.m_RecurrentToCellWeights->Allocate();
1059     layer->m_BasicParameters.m_RecurrentToOutputWeights->Allocate();
1060 
1061     layer->m_BasicParameters.m_ForgetGateBias->Allocate();
1062     layer->m_BasicParameters.m_CellBias->Allocate();
1063     layer->m_BasicParameters.m_OutputGateBias->Allocate();
1064 
1065     layer->m_LayerNormParameters.m_ForgetLayerNormWeights->Allocate();
1066     layer->m_LayerNormParameters.m_CellLayerNormWeights->Allocate();
1067     layer->m_LayerNormParameters.m_OutputLayerNormWeights->Allocate();
1068 
1069     // Input and output layers
1070     Layer* const input = graph.AddLayer<InputLayer>(0, "input");
1071     Layer* const outputStateIn = graph.AddLayer<InputLayer>(1, "outputStateIn");
1072     Layer* const cellStateIn = graph.AddLayer<InputLayer>(2, "cellStateIn");
1073 
1074     Layer* const outputStateOut = graph.AddLayer<OutputLayer>(0, "outputStateOut");
1075     Layer* const cellStateOut = graph.AddLayer<OutputLayer>(1, "cellStateOut");
1076     Layer* const output = graph.AddLayer<OutputLayer>(2, "output");
1077 
1078     // Input/Output tensor info
1079     armnn::TensorInfo inputInfo({numBatches , inputSize},
1080                                 armnn::DataType::QAsymmS8,
1081                                 inputScale,
1082                                 inputOffset);
1083 
1084     armnn::TensorInfo cellStateInfo({numBatches , numUnits},
1085                                     armnn::DataType::QSymmS16,
1086                                     cellStateScale,
1087                                     cellStateOffset);
1088 
1089     armnn::TensorInfo outputStateInfo({numBatches , outputSize},
1090                                       armnn::DataType::QAsymmS8,
1091                                       outputScale,
1092                                       outputOffset);
1093 
1094     // Connect layers to slots
1095     Connect(input, layer, inputInfo, 0, 0);
1096     Connect(outputStateIn, layer, outputStateInfo, 0, 1);
1097     Connect(cellStateIn, layer, cellStateInfo, 0, 2);
1098 
1099     Connect(layer, outputStateOut, outputStateInfo, 0, 0);
1100     Connect(layer, cellStateOut, cellStateInfo, 1, 0);
1101     Connect(layer, output, outputStateInfo, 2, 0);
1102 
1103     CreateTensorHandles(graph, factory);
1104 
1105     // Create and check workload
1106     auto workload = MakeAndCheckWorkload<QLstmWorkload>(*layer, factory);
1107     QLstmQueueDescriptor queueDescriptor = workload->GetData();
1108     CHECK(queueDescriptor.m_Parameters.m_CellClip == 0.0f);
1109     CHECK(queueDescriptor.m_Parameters.m_ProjectionClip == 0.0f);
1110     CHECK(queueDescriptor.m_Inputs.size() == 3);
1111     CHECK(queueDescriptor.m_Outputs.size() == 3);
1112 
1113     CHECK((queueDescriptor.m_InputToForgetWeights->GetTensorInfo() == inputWeightsInfo));
1114     CHECK((queueDescriptor.m_InputToCellWeights->GetTensorInfo() == inputWeightsInfo));
1115     CHECK((queueDescriptor.m_InputToOutputWeights->GetTensorInfo() == inputWeightsInfo));
1116 
1117     CHECK((queueDescriptor.m_RecurrentToForgetWeights->GetTensorInfo() == recurrentWeightsInfo));
1118     CHECK((queueDescriptor.m_RecurrentToCellWeights->GetTensorInfo() == recurrentWeightsInfo));
1119     CHECK((queueDescriptor.m_RecurrentToOutputWeights->GetTensorInfo() == recurrentWeightsInfo));
1120 
1121     CHECK((queueDescriptor.m_ForgetGateBias->GetTensorInfo() == biasInfo));
1122     CHECK((queueDescriptor.m_CellBias->GetTensorInfo() == biasInfo));
1123     CHECK((queueDescriptor.m_OutputGateBias->GetTensorInfo() == biasInfo));
1124 
1125     return workload;
1126 }
1127 
1128 template<typename Convolution2dWorkload, armnn::DataType DataType>
CreateDirectConvolution2dWorkloadTest(armnn::IWorkloadFactory & factory,armnn::Graph & graph)1129 std::unique_ptr<Convolution2dWorkload> CreateDirectConvolution2dWorkloadTest(armnn::IWorkloadFactory& factory,
1130                                                                              armnn::Graph& graph)
1131 {
1132     // Creates the layer we're testing.
1133     Convolution2dDescriptor layerDesc;
1134     layerDesc.m_PadLeft = 1;
1135     layerDesc.m_PadRight = 1;
1136     layerDesc.m_PadTop = 1;
1137     layerDesc.m_PadBottom = 1;
1138     layerDesc.m_StrideX = 1;
1139     layerDesc.m_StrideY = 1;
1140     layerDesc.m_BiasEnabled = true;
1141 
1142     Convolution2dLayer* const layer = graph.AddLayer<Convolution2dLayer>(layerDesc, "layer");
1143 
1144     float inputsQScale = 1.0f;
1145     float outputQScale = DataType == armnn::DataType::QAsymmU8 ? 2.0f : 1.0;
1146 
1147     TensorShape biasShape = TensorShape{ 2 };
1148     TensorShape weightShape = TensorShape{ 2, 3, 3, 3 };
1149     armnn::TensorInfo weightsTensorInfo(weightShape, DataType, inputsQScale);
1150     weightsTensorInfo.SetConstant();
1151     armnn::TensorInfo biasTensorInfo(biasShape, GetBiasDataType(DataType), inputsQScale);
1152     biasTensorInfo.SetConstant();
1153 
1154     // Creates extra layers.
1155     Layer* const input = graph.AddLayer<InputLayer>(0, "input");
1156     auto const weights = graph.AddLayer<ConstantLayer>("weights");
1157     auto const bias = graph.AddLayer<ConstantLayer>("bias");
1158     Layer* const output = graph.AddLayer<OutputLayer>(0, "output");
1159 
1160     weights->m_LayerOutput = std::make_unique<ScopedTensorHandle>(weightsTensorInfo);
1161     weights->m_LayerOutput->Allocate();
1162     bias->m_LayerOutput = std::make_unique<ScopedTensorHandle>(biasTensorInfo);
1163     bias->m_LayerOutput->Allocate();
1164 
1165     // Connects up.
1166     Connect(input, layer, TensorInfo({2, 3, 6, 6}, DataType, inputsQScale));
1167     Connect(weights, layer, weightsTensorInfo, 0, 1);
1168     Connect(bias, layer, biasTensorInfo, 0, 2);
1169     Connect(layer, output, TensorInfo({2, 2, 6, 6}, DataType, outputQScale));
1170     CreateTensorHandles(graph, factory);
1171 
1172     // Makes the workload and checks it.
1173     auto workload = MakeAndCheckWorkload<Convolution2dWorkload>(*layer, factory);
1174 
1175     Convolution2dQueueDescriptor queueDescriptor = workload->GetData();
1176     CHECK(queueDescriptor.m_Parameters.m_StrideX == 1);
1177     CHECK(queueDescriptor.m_Parameters.m_StrideY == 1);
1178     CHECK(queueDescriptor.m_Parameters.m_PadLeft == 1);
1179     CHECK(queueDescriptor.m_Parameters.m_PadRight == 1);
1180     CHECK(queueDescriptor.m_Parameters.m_PadTop == 1);
1181     CHECK(queueDescriptor.m_Parameters.m_PadBottom == 1);
1182     CHECK(queueDescriptor.m_Parameters.m_BiasEnabled == true);
1183 
1184     CHECK(queueDescriptor.m_Inputs.size() == 3);
1185     CHECK(queueDescriptor.m_Outputs.size() == 1);
1186 
1187     // Returns so we can do extra, backend-specific tests.
1188     return workload;
1189 }
1190 
1191 template <typename DepthwiseConvolution2dFloat32Workload, armnn::DataType DataType>
CreateDepthwiseConvolution2dWorkloadTest(armnn::IWorkloadFactory & factory,armnn::Graph & graph,DataLayout dataLayout=DataLayout::NCHW)1192 std::unique_ptr<DepthwiseConvolution2dFloat32Workload> CreateDepthwiseConvolution2dWorkloadTest(
1193     armnn::IWorkloadFactory& factory, armnn::Graph& graph, DataLayout dataLayout = DataLayout::NCHW)
1194 {
1195     // Creates the layer we're testing.
1196     DepthwiseConvolution2dDescriptor layerDesc;
1197     layerDesc.m_PadLeft     = 1;
1198     layerDesc.m_PadRight    = 2;
1199     layerDesc.m_PadTop      = 1;
1200     layerDesc.m_PadBottom   = 2;
1201     layerDesc.m_StrideX     = 1;
1202     layerDesc.m_StrideY     = 1;
1203     layerDesc.m_BiasEnabled = false;
1204     layerDesc.m_DataLayout  = dataLayout;
1205 
1206     float inputsQScale = 1.0f;
1207     float outputQScale = DataType == armnn::DataType::QAsymmU8 ? 2.0f : 1.0;
1208 
1209     TensorShape weightShape({1, 4, 4, 2});
1210     TensorShape inputShape = (dataLayout == DataLayout::NCHW) ?
1211                              TensorShape{ 2, 2, 5, 5 } : TensorShape{ 2, 5, 5, 2 };
1212     TensorShape outputShape = (dataLayout == DataLayout::NCHW) ?
1213                               TensorShape{ 2, 2, 5, 5 } : TensorShape{ 2, 5, 5, 2 };
1214 
1215     DepthwiseConvolution2dLayer* const layer = graph.AddLayer<DepthwiseConvolution2dLayer>(layerDesc, "layer");
1216 
1217 
1218     // Creates extra layers.
1219     Layer* const input = graph.AddLayer<InputLayer>(0, "input");
1220     Layer* const weights = graph.AddLayer<ConstantLayer>("weights");
1221     Layer* const output = graph.AddLayer<OutputLayer>(0, "output");
1222 
1223     // Connects up.
1224     Connect(input, layer, TensorInfo(inputShape, DataType, inputsQScale));
1225     Connect(weights, layer, TensorInfo(weightShape, DataType, inputsQScale, 0.0f, true), 0, 1);
1226     Connect(layer, output, TensorInfo(outputShape, DataType, outputQScale));
1227     CreateTensorHandles(graph, factory);
1228 
1229     // Makes the workload and checks it.
1230     auto workload = MakeAndCheckWorkload<DepthwiseConvolution2dFloat32Workload>(*layer, factory);
1231 
1232     DepthwiseConvolution2dQueueDescriptor queueDescriptor = workload->GetData();
1233     CHECK(queueDescriptor.m_Parameters.m_StrideX == 1);
1234     CHECK(queueDescriptor.m_Parameters.m_StrideY == 1);
1235     CHECK(queueDescriptor.m_Parameters.m_PadLeft == 1);
1236     CHECK(queueDescriptor.m_Parameters.m_PadRight == 2);
1237     CHECK(queueDescriptor.m_Parameters.m_PadTop == 1);
1238     CHECK(queueDescriptor.m_Parameters.m_PadBottom == 2);
1239     CHECK(queueDescriptor.m_Parameters.m_BiasEnabled == false);
1240     CHECK((queueDescriptor.m_Parameters.m_DataLayout == dataLayout));
1241 
1242     CHECK(queueDescriptor.m_Inputs.size() == 2);
1243     CHECK(queueDescriptor.m_Outputs.size() == 1);
1244 
1245     // Returns so we can do extra, backend-specific tests.
1246     return workload;
1247 }
1248 
1249 template <typename FullyConnectedWorkload, armnn::DataType DataType>
CreateFullyConnectedWorkloadTest(armnn::IWorkloadFactory & factory,armnn::Graph & graph)1250 std::unique_ptr<FullyConnectedWorkload> CreateFullyConnectedWorkloadTest(armnn::IWorkloadFactory& factory,
1251                                                                          armnn::Graph&            graph)
1252 {
1253     // Creates the layer we're testing.
1254     FullyConnectedDescriptor layerDesc;
1255     layerDesc.m_BiasEnabled = false;
1256     layerDesc.m_TransposeWeightMatrix = true;
1257 
1258     FullyConnectedLayer* const layer = graph.AddLayer<FullyConnectedLayer>(layerDesc, "layer");
1259 
1260     float inputsQScale = 1.0f;
1261     float outputQScale = DataType == armnn::DataType::QAsymmU8 ? 2.0f : 1.0;
1262 
1263     armnn::TensorInfo weightsTensorInfo({7, 20}, DataType, inputsQScale);
1264     weightsTensorInfo.SetConstant();
1265 
1266     // Creates extra layers.
1267     Layer* const input = graph.AddLayer<InputLayer>(0, "input");
1268     auto const weights = graph.AddLayer<ConstantLayer>("weights");
1269     Layer* const output = graph.AddLayer<OutputLayer>(0, "output");
1270 
1271     weights->m_LayerOutput = std::make_unique<ScopedTensorHandle>(weightsTensorInfo);
1272     weights->m_LayerOutput->Allocate();
1273 
1274     // Connects up.
1275     Connect(input, layer, TensorInfo({3, 1, 4, 5}, DataType, inputsQScale), 0, 0);
1276     Connect(weights, layer, weightsTensorInfo, 0, 1);
1277     Connect(layer, output, TensorInfo({3, 7}, DataType, outputQScale));
1278     CreateTensorHandles(graph, factory);
1279 
1280     // Makes the workload and checks it.
1281     auto workload = MakeAndCheckWorkload<FullyConnectedWorkload>(*layer, factory);
1282 
1283     FullyConnectedQueueDescriptor queueDescriptor = workload->GetData();
1284     CHECK(queueDescriptor.m_Parameters.m_TransposeWeightMatrix == true);
1285 
1286     CHECK(queueDescriptor.m_Inputs.size() == 2);
1287     CHECK(queueDescriptor.m_Outputs.size() == 1);
1288 
1289     // Returns so we can do extra, backend-specific tests.
1290     return workload;
1291 }
1292 
1293 template <typename FullyConnectedWorkload, armnn::DataType DataType>
CreateFullyConnectedWithBlobWorkloadTest(armnn::IWorkloadFactory & factory,armnn::Graph & graph)1294 std::unique_ptr<FullyConnectedWorkload> CreateFullyConnectedWithBlobWorkloadTest
1295     (armnn::IWorkloadFactory& factory,
1296      armnn::Graph& graph)
1297 {
1298     // Creates the layer we're testing.
1299     FullyConnectedDescriptor layerDesc;
1300     layerDesc.m_BiasEnabled = true;
1301     layerDesc.m_TransposeWeightMatrix = true;
1302 
1303     FullyConnectedLayer* const layer = graph.AddLayer<FullyConnectedLayer>(layerDesc, "layer");
1304 
1305     float inputsQScale = 1.0f;
1306     float outputQScale = DataType == armnn::DataType::QAsymmU8 ? 2.0f : 1.0;
1307 
1308     armnn::TensorInfo weightsTensorInfo({7, 20}, DataType, inputsQScale);
1309     armnn::TensorInfo biasesTensorInfo({7}, GetBiasDataType(DataType), inputsQScale);
1310     weightsTensorInfo.SetConstant();
1311     biasesTensorInfo.SetConstant();
1312 
1313     auto activationDesc = std::make_shared<ActivationDescriptor>();
1314     activationDesc->m_A        = 10.0f;
1315     activationDesc->m_B        = 5.0f;
1316     activationDesc->m_Function = armnn::ActivationFunction::BoundedReLu;
1317 
1318     layer->SetAdditionalInfoForObject(activationDesc);
1319 
1320     // Check that the additional information can be queried from the layer
1321     std::shared_ptr<ActivationDescriptor> activationDescPtr = layer->GetAdditionalInformation<ActivationDescriptor>();
1322     ARMNN_ASSERT(static_cast<float>(activationDescPtr->m_A) == 10.0f);
1323     ARMNN_ASSERT(static_cast<float>(activationDescPtr->m_B) == 5.0f);
1324     ARMNN_ASSERT(static_cast<ActivationFunction>(activationDescPtr->m_Function) ==
1325         armnn::ActivationFunction::BoundedReLu);
1326 
1327     // Creates extra layers.
1328     Layer* const input = graph.AddLayer<InputLayer>(0, "input");
1329     auto const weights = graph.AddLayer<ConstantLayer>("weights");
1330     auto const biases = graph.AddLayer<ConstantLayer>("biases");
1331     Layer* const output = graph.AddLayer<OutputLayer>(0, "output");
1332 
1333     weights->m_LayerOutput = std::make_unique<ScopedTensorHandle>(weightsTensorInfo);
1334     weights->m_LayerOutput->Allocate();
1335     biases->m_LayerOutput = std::make_unique<ScopedTensorHandle>(biasesTensorInfo);
1336     biases->m_LayerOutput->Allocate();
1337 
1338     // Connects up.
1339     Connect(input, layer, TensorInfo({3, 1, 4, 5}, DataType, inputsQScale), 0, 0);
1340     Connect(weights, layer, weightsTensorInfo, 0, 1);
1341     Connect(biases, layer, biasesTensorInfo, 0, 2);
1342     Connect(layer, output, TensorInfo({3, 7}, DataType, outputQScale));
1343     CreateTensorHandles(graph, factory);
1344 
1345     // Makes the workload and checks it.
1346     auto workload = MakeAndCheckWorkload<FullyConnectedWorkload>(*layer, factory);
1347 
1348     FullyConnectedQueueDescriptor queueDescriptor = workload->GetData();
1349 
1350     const ActivationDescriptor* queueDescBlobPtr = queueDescriptor.GetAdditionalInformation<ActivationDescriptor>();
1351     IgnoreUnused(queueDescBlobPtr);
1352 
1353     ARMNN_ASSERT(static_cast<float>(queueDescBlobPtr->m_A) == 10.0f);
1354     ARMNN_ASSERT(static_cast<float>(queueDescBlobPtr->m_B) == 5.0f);
1355     ARMNN_ASSERT(
1356         static_cast<ActivationFunction>(queueDescBlobPtr->m_Function) == armnn::ActivationFunction::BoundedReLu
1357     );
1358 
1359     CHECK(queueDescriptor.m_Parameters.m_BiasEnabled == true);
1360     CHECK(queueDescriptor.m_Parameters.m_TransposeWeightMatrix == true);
1361     CHECK(queueDescriptor.m_Inputs.size() == 3);
1362     CHECK(queueDescriptor.m_Outputs.size() == 1);
1363 
1364     // Returns so we can do extra, backend-specific tests.
1365     return workload;
1366 }
1367 
1368 template <typename FullyConnectedWorkload, armnn::DataType DataType>
CreateFullyConnectedWorkloadWeightsBiasesAsInputsTest(armnn::IWorkloadFactory & factory,armnn::Graph & graph)1369 std::unique_ptr<FullyConnectedWorkload> CreateFullyConnectedWorkloadWeightsBiasesAsInputsTest
1370     (armnn::IWorkloadFactory& factory,
1371      armnn::Graph&            graph)
1372 {
1373     // Creates the layer we're testing.
1374     FullyConnectedDescriptor layerDesc;
1375     layerDesc.m_BiasEnabled = true;
1376     layerDesc.m_TransposeWeightMatrix = true;
1377     layerDesc.m_ConstantWeights = false;
1378 
1379     FullyConnectedLayer* const layer = graph.AddLayer<FullyConnectedLayer>(layerDesc, "layer");
1380 
1381     float inputsQScale = 1.0f;
1382     float outputQScale = DataType == armnn::DataType::QAsymmU8 ? 2.0f : 1.0;
1383 
1384     // Creates extra layers with weights and biases as input layers.
1385     Layer* const input   = graph.AddLayer<InputLayer>(1, "input");
1386     Layer* const weights = graph.AddLayer<InputLayer>(2, "weights");
1387     Layer* const biases  = graph.AddLayer<InputLayer>(3, "biases");
1388     Layer* const output  = graph.AddLayer<OutputLayer>(0, "output");
1389 
1390     // Connects up.
1391     Connect(input, layer, TensorInfo({3, 1, 4, 5}, DataType, inputsQScale), 0, 0);
1392     Connect(weights, layer, TensorInfo({7, 20}, DataType, inputsQScale), 0, 1);
1393     Connect(biases, layer, TensorInfo({7}, GetBiasDataType(DataType), inputsQScale), 0, 2);
1394     Connect(layer, output, TensorInfo({3, 7}, DataType, outputQScale));
1395     CreateTensorHandles(graph, factory);
1396 
1397     // Makes the workload and checks it.
1398     auto workload = MakeAndCheckWorkload<FullyConnectedWorkload>(*layer, factory);
1399 
1400     FullyConnectedQueueDescriptor queueDescriptor = workload->GetData();
1401 
1402     CHECK(queueDescriptor.m_Parameters.m_BiasEnabled == true);
1403     CHECK(queueDescriptor.m_Parameters.m_TransposeWeightMatrix == true);
1404     CHECK(queueDescriptor.m_Parameters.m_ConstantWeights == false);
1405     CHECK(queueDescriptor.m_Inputs.size() == 3);
1406     CHECK(queueDescriptor.m_Outputs.size() == 1);
1407 
1408     // Returns so we can do extra, backend-specific tests.
1409     return workload;
1410 }
1411 
1412 
1413 template <typename NormalizationWorkload, armnn::DataType DataType>
CreateNormalizationWorkloadTest(armnn::IWorkloadFactory & factory,armnn::Graph & graph,DataLayout dataLayout=DataLayout::NCHW)1414 std::unique_ptr<NormalizationWorkload> CreateNormalizationWorkloadTest(armnn::IWorkloadFactory& factory,
1415                                                                        armnn::Graph& graph,
1416                                                                        DataLayout dataLayout = DataLayout::NCHW)
1417 {
1418     // Creates the layer we're testing.
1419     NormalizationDescriptor layerDesc;
1420     layerDesc.m_NormChannelType = NormalizationAlgorithmChannel::Across;
1421     layerDesc.m_NormMethodType = NormalizationAlgorithmMethod::LocalBrightness;
1422     layerDesc.m_NormSize = 3;
1423     layerDesc.m_Alpha = 0.5f;
1424     layerDesc.m_Beta = -1.0f;
1425     layerDesc.m_K = 0.2f;
1426     layerDesc.m_DataLayout = dataLayout;
1427 
1428     NormalizationLayer* layer = graph.AddLayer<NormalizationLayer>(layerDesc, "layer");
1429 
1430     // Creates extra layers.
1431     Layer* const input = graph.AddLayer<InputLayer>(0, "input");
1432     Layer* const output = graph.AddLayer<OutputLayer>(0, "output");
1433 
1434     TensorShape inputShape = (dataLayout == DataLayout::NCHW) ?
1435                 TensorShape{ 3, 5, 5, 1 } : TensorShape{ 3, 1, 5, 5 };
1436     TensorShape outputShape = (dataLayout == DataLayout::NCHW) ?
1437                 TensorShape{ 3, 5, 5, 1 } : TensorShape{ 3, 1, 5, 5 };
1438 
1439     // Connects up.
1440     armnn::TensorInfo inputTensorInfo(inputShape, DataType);
1441     armnn::TensorInfo outputTensorInfo(outputShape, DataType);
1442     Connect(input, layer, inputTensorInfo);
1443     Connect(layer, output, outputTensorInfo);
1444     CreateTensorHandles(graph, factory);
1445 
1446     // Makes the workload and checks it.
1447     auto workload = MakeAndCheckWorkload<NormalizationWorkload>(*layer, factory);
1448 
1449     NormalizationQueueDescriptor queueDescriptor = workload->GetData();
1450     CHECK((queueDescriptor.m_Parameters.m_NormChannelType == NormalizationAlgorithmChannel::Across));
1451     CHECK((queueDescriptor.m_Parameters.m_NormMethodType == NormalizationAlgorithmMethod::LocalBrightness));
1452     CHECK(queueDescriptor.m_Parameters.m_NormSize == 3);
1453     CHECK(queueDescriptor.m_Parameters.m_Alpha == 0.5f);
1454     CHECK(queueDescriptor.m_Parameters.m_Beta == -1.0f);
1455     CHECK(queueDescriptor.m_Parameters.m_K == 0.2f);
1456     CHECK((queueDescriptor.m_Parameters.m_DataLayout == dataLayout));
1457 
1458     CHECK(queueDescriptor.m_Inputs.size() == 1);
1459     CHECK(queueDescriptor.m_Outputs.size() == 1);
1460 
1461     // Returns so we can do extra, backend-specific tests.
1462     return workload;
1463 }
1464 
1465 template <typename Pooling2dWorkload, armnn::DataType DataType>
CreatePooling2dWorkloadTest(armnn::IWorkloadFactory & factory,armnn::Graph & graph,DataLayout dataLayout=DataLayout::NCHW)1466 std::unique_ptr<Pooling2dWorkload> CreatePooling2dWorkloadTest(armnn::IWorkloadFactory& factory,
1467                                                                armnn::Graph&            graph,
1468                                                                DataLayout dataLayout = DataLayout::NCHW)
1469 {
1470     // Creates the layer we're testing.
1471     Pooling2dDescriptor layerDesc;
1472     layerDesc.m_PoolType = PoolingAlgorithm::Average;
1473     layerDesc.m_PoolWidth = 3;
1474     layerDesc.m_PoolHeight = 3;
1475     layerDesc.m_PadLeft = 2;
1476     layerDesc.m_PadRight = 2;
1477     layerDesc.m_PadTop = 1;
1478     layerDesc.m_PadBottom = 1;
1479     layerDesc.m_StrideX = 2;
1480     layerDesc.m_StrideY = 3;
1481     layerDesc.m_OutputShapeRounding = OutputShapeRounding::Floor;
1482     layerDesc.m_DataLayout = dataLayout;
1483 
1484     Pooling2dLayer* const layer = graph.AddLayer<Pooling2dLayer>(layerDesc, "layer");
1485 
1486     // Create extra layers
1487     Layer* const input = graph.AddLayer<InputLayer>(0, "input");
1488     Layer* const output = graph.AddLayer<OutputLayer>(0, "output");
1489 
1490     TensorShape inputShape  = (dataLayout == DataLayout::NCHW) ? TensorShape{3, 2, 5, 5} : TensorShape{3, 5, 5, 2};
1491     TensorShape outputShape = (dataLayout == DataLayout::NCHW) ? TensorShape{3, 2, 2, 4} : TensorShape{3, 2, 4, 2};
1492 
1493     // Connect up
1494     Connect(input, layer, TensorInfo(inputShape, DataType));
1495     Connect(layer, output, TensorInfo(outputShape, DataType));
1496     CreateTensorHandles(graph, factory);
1497 
1498     // Make the workload and checks it
1499     auto workload = MakeAndCheckWorkload<Pooling2dWorkload>(*layer, factory);
1500 
1501     Pooling2dQueueDescriptor queueDescriptor = workload->GetData();
1502     CHECK((queueDescriptor.m_Parameters.m_PoolType == PoolingAlgorithm::Average));
1503     CHECK((queueDescriptor.m_Parameters.m_OutputShapeRounding == OutputShapeRounding::Floor));
1504     CHECK(queueDescriptor.m_Parameters.m_PoolWidth == 3);
1505     CHECK(queueDescriptor.m_Parameters.m_PoolHeight == 3);
1506     CHECK(queueDescriptor.m_Parameters.m_StrideX == 2);
1507     CHECK(queueDescriptor.m_Parameters.m_StrideY == 3);
1508     CHECK(queueDescriptor.m_Parameters.m_PadLeft == 2);
1509     CHECK(queueDescriptor.m_Parameters.m_PadRight == 2);
1510     CHECK(queueDescriptor.m_Parameters.m_PadTop == 1);
1511     CHECK(queueDescriptor.m_Parameters.m_PadBottom == 1);
1512     CHECK((queueDescriptor.m_Parameters.m_DataLayout == dataLayout));
1513 
1514     CHECK(queueDescriptor.m_Inputs.size() == 1);
1515     CHECK(queueDescriptor.m_Outputs.size() == 1);
1516 
1517     // Return so we can do extra, backend-specific tests
1518     return workload;
1519 }
1520 
1521 template <typename SoftmaxWorkload, armnn::DataType DataType>
CreateSoftmaxWorkloadTest(armnn::IWorkloadFactory & factory,armnn::Graph & graph)1522 std::unique_ptr<SoftmaxWorkload> CreateSoftmaxWorkloadTest(armnn::IWorkloadFactory& factory,
1523                                                            armnn::Graph&            graph)
1524 {
1525     // Create the layer we're testing.
1526     SoftmaxDescriptor softmaxDescriptor;
1527     // Set Axis to -1 if CL or Neon until further Axes are supported.
1528     if (factory.GetBackendId() == armnn::Compute::CpuAcc || factory.GetBackendId() == armnn::Compute::GpuAcc)
1529     {
1530         softmaxDescriptor.m_Axis = -1;
1531     }
1532 
1533     Layer* const layer = graph.AddLayer<SoftmaxLayer>(softmaxDescriptor, "layer");
1534     // Create extra layers.
1535     Layer* const input = graph.AddLayer<InputLayer>(0, "input");
1536     Layer* const output = graph.AddLayer<OutputLayer>(0, "output");
1537 
1538     // Connect up
1539     armnn::TensorInfo tensorInfo({4, 1}, DataType);
1540     if (DataType == armnn::DataType::QAsymmU8)
1541     {
1542         tensorInfo.SetQuantizationOffset(0);
1543         tensorInfo.SetQuantizationScale(1.f / 256);
1544     }
1545     else if (DataType == armnn::DataType::QAsymmS8)
1546     {
1547         tensorInfo.SetQuantizationOffset(-128);
1548         tensorInfo.SetQuantizationScale(1.f / 256);
1549     }
1550 
1551     Connect(input, layer, tensorInfo);
1552     Connect(layer, output, tensorInfo);
1553     CreateTensorHandles(graph, factory);
1554 
1555     // Make the workload and checks it.
1556     auto workload = MakeAndCheckWorkload<SoftmaxWorkload>(*layer, factory);
1557 
1558     SoftmaxQueueDescriptor queueDescriptor = workload->GetData();
1559     CHECK(queueDescriptor.m_Inputs.size() == 1);
1560     CHECK(queueDescriptor.m_Outputs.size() == 1);
1561 
1562     // Return so we can do extra, backend-specific tests.
1563     return workload;
1564 }
1565 
1566 template<typename SplitterWorkload, armnn::DataType DataType>
1567 std::unique_ptr<SplitterWorkload>
CreateSplitterWorkloadTest(armnn::IWorkloadFactory & factory,armnn::Graph & graph)1568     CreateSplitterWorkloadTest(armnn::IWorkloadFactory& factory, armnn::Graph& graph)
1569 {
1570     // Create the layer we're testing.
1571     // NOTE: need three dimensions channels, height/y, width/x because the Compute
1572     //       library restricts subtensors to have the same x and y dimensions as
1573     //       their parent tensors, and therefore the origin on the x and y dimension
1574     //       has to be zero for any view. So we need a third dimension to split...
1575     // NOTE: arguments are: number of views, number of dimensions.
1576     ViewsDescriptor layerDesc(3, 3);
1577     // NOTE: arguments are: view, dimension, value.
1578     layerDesc.SetViewOriginCoord(0, 0, 0);
1579     layerDesc.SetViewOriginCoord(1, 0, 1);
1580     layerDesc.SetViewOriginCoord(2, 0, 3);
1581 
1582     Layer* const layer = graph.AddLayer<SplitterLayer>(layerDesc, "layer");
1583 
1584     // Adds extra layers.
1585     Layer* const input = graph.AddLayer<InputLayer>(0, "input");
1586     Layer* const output0 = graph.AddLayer<OutputLayer>(0, "output0");
1587     Layer* const output1 = graph.AddLayer<OutputLayer>(1, "output1");
1588     Layer* const output2 = graph.AddLayer<OutputLayer>(2, "output2");
1589 
1590     // Connects up.
1591     armnn::TensorInfo tensorInfo({5, 7, 7}, DataType);
1592     Connect(input, layer, tensorInfo);
1593 
1594     armnn::TensorInfo output0Info({1, 7, 7}, DataType);
1595     armnn::TensorInfo output1Info({2, 7, 7}, DataType);
1596     armnn::TensorInfo output2Info({2, 7, 7}, DataType);
1597 
1598     Connect(layer, output0, output0Info, 0, 0);
1599     Connect(layer, output1, output1Info, 1, 0);
1600     Connect(layer, output2, output2Info, 2, 0);
1601 
1602     CreateTensorHandles(graph, factory);
1603 
1604     // Makes the workload and checks it.
1605     auto workload = MakeAndCheckWorkload<SplitterWorkload>(*layer, factory);
1606 
1607     SplitterQueueDescriptor queueDescriptor = workload->GetData();
1608     CHECK(queueDescriptor.m_Inputs.size() == 1);
1609     CHECK(queueDescriptor.m_Outputs.size() == 3);
1610     CHECK(queueDescriptor.m_ViewOrigins.size() == 3);
1611 
1612     CHECK(queueDescriptor.m_ViewOrigins[0].m_Origin[0] == 0);
1613     CHECK(queueDescriptor.m_ViewOrigins[1].m_Origin[0] == 1);
1614     CHECK(queueDescriptor.m_ViewOrigins[2].m_Origin[0] == 3);
1615     CHECK(queueDescriptor.m_ViewOrigins[0].m_Origin[1] == 0);
1616     CHECK(queueDescriptor.m_ViewOrigins[1].m_Origin[1] == 0);
1617     CHECK(queueDescriptor.m_ViewOrigins[2].m_Origin[1] == 0);
1618     CHECK(queueDescriptor.m_ViewOrigins[0].m_Origin[2] == 0);
1619     CHECK(queueDescriptor.m_ViewOrigins[1].m_Origin[2] == 0);
1620     CHECK(queueDescriptor.m_ViewOrigins[2].m_Origin[2] == 0);
1621 
1622     // Returns so we can do extra, backend-specific tests.
1623     return workload;
1624 }
1625 
1626 /// This function constructs a graph with both a splitter and a concat, and returns a pair of the workloads.
1627 template<typename SplitterWorkload, typename ConcatWorkload, armnn::DataType DataType>
1628 std::pair<std::unique_ptr<SplitterWorkload>, std::unique_ptr<ConcatWorkload>>
CreateSplitterConcatWorkloadTest(armnn::IWorkloadFactory & factory,armnn::Graph & graph)1629     CreateSplitterConcatWorkloadTest(armnn::IWorkloadFactory &factory, armnn::Graph &graph)
1630 {
1631     armnn::TensorInfo inputTensorInfo({ 1, 2, 100, 10 }, DataType);
1632 
1633     armnn::TensorInfo splitTensorInfo1({ 1, 1, 100, 10 }, DataType);
1634     armnn::TensorInfo splitTensorInfo2({ 1, 1, 100, 10 }, DataType);
1635 
1636     //Constructs the graph.
1637     Layer* const input = graph.AddLayer<InputLayer>(0, "input");
1638 
1639     armnn::ViewsDescriptor splitterViews(2);
1640     splitterViews.SetViewOriginCoord(0, 0, 0);
1641     splitterViews.SetViewOriginCoord(0, 1, 0);
1642     splitterViews.SetViewOriginCoord(0, 2, 0);
1643     splitterViews.SetViewOriginCoord(0, 3, 0);
1644 
1645     splitterViews.SetViewOriginCoord(1, 0, 0);
1646     splitterViews.SetViewOriginCoord(1, 1, 1);
1647     splitterViews.SetViewOriginCoord(1, 2, 0);
1648     splitterViews.SetViewOriginCoord(1, 3, 0);
1649 
1650     // create splitter layer
1651     Layer* const splitter = graph.AddLayer<SplitterLayer>(splitterViews, "splitter");
1652     CHECK(splitter);
1653 
1654     armnn::OriginsDescriptor concatViews(2);
1655     concatViews.SetViewOriginCoord(0, 0, 0);
1656     concatViews.SetViewOriginCoord(0, 1, 1);
1657     concatViews.SetViewOriginCoord(0, 2, 0);
1658     concatViews.SetViewOriginCoord(0, 3, 0);
1659 
1660     concatViews.SetViewOriginCoord(1, 0, 0);
1661     concatViews.SetViewOriginCoord(1, 1, 0);
1662     concatViews.SetViewOriginCoord(1, 2, 0);
1663     concatViews.SetViewOriginCoord(1, 3, 0);
1664 
1665     // create concat layer
1666     Layer* const concat = graph.AddLayer<ConcatLayer>(concatViews, "concat");
1667     CHECK(concat);
1668 
1669     Layer* const output = graph.AddLayer<OutputLayer>(0, "output");
1670 
1671     // Adds connections.
1672     // connect input to splitter
1673     Connect(input, splitter, inputTensorInfo, 0, 0);
1674     // connect splitter[0] to concat[1]
1675     Connect(splitter, concat, splitTensorInfo1, 0, 1); // The splitter & concat are connected up.
1676     // connect splitter[1] to concat[0]
1677     Connect(splitter, concat, splitTensorInfo2, 1, 0); // So that the outputs are flipped round.
1678     // connect concat to output
1679     Connect(concat, output, inputTensorInfo, 0, 0);
1680 
1681     // created tensor handles
1682     CreateTensorHandles(graph, factory);
1683 
1684     // created splitter workload
1685     auto workloadSplitter = MakeAndCheckWorkload<SplitterWorkload>(*splitter, factory);
1686     CHECK(workloadSplitter);
1687     // created concat workload
1688     auto workloadConcat = MakeAndCheckWorkload<ConcatWorkload>(*concat, factory);
1689     CHECK(workloadConcat);
1690 
1691     return {std::move(workloadSplitter), std::move(workloadConcat)};
1692 }
1693 
1694 
1695 /// This function constructs a graph with a splitter with two outputs. Each of the outputs is then
1696 /// connected to two different activation layers
1697 template<typename SplitterWorkload, typename ActivationWorkload, armnn::DataType DataType>
CreateSplitterMultipleInputsOneOutputWorkloadTest(armnn::IWorkloadFactory & factory,armnn::Graph & graph,std::unique_ptr<SplitterWorkload> & wlSplitter,std::unique_ptr<ActivationWorkload> & wlActiv0_0,std::unique_ptr<ActivationWorkload> & wlActiv0_1,std::unique_ptr<ActivationWorkload> & wlActiv1_0,std::unique_ptr<ActivationWorkload> & wlActiv1_1)1698 void CreateSplitterMultipleInputsOneOutputWorkloadTest(armnn::IWorkloadFactory& factory, armnn::Graph& graph,
1699                                  std::unique_ptr<SplitterWorkload>& wlSplitter,
1700                                  std::unique_ptr<ActivationWorkload>& wlActiv0_0,
1701                                  std::unique_ptr<ActivationWorkload>& wlActiv0_1,
1702                                  std::unique_ptr<ActivationWorkload>& wlActiv1_0,
1703                                  std::unique_ptr<ActivationWorkload>& wlActiv1_1)
1704 {
1705     armnn::TensorInfo inputTensorInfo ({ 1, 3, 100, 50 }, DataType);
1706     armnn::TensorInfo splitTensorInfo1({ 1, 1, 100, 50 }, DataType);
1707     armnn::TensorInfo splitTensorInfo2({ 1, 2, 100, 50 }, DataType);
1708 
1709     //Constructs the graph.
1710     Layer* const input = graph.AddLayer<InputLayer>(0, "input");
1711 
1712     armnn::ViewsDescriptor splitterViews(2);
1713 
1714     splitterViews.SetViewOriginCoord(0, 0, 0);
1715     splitterViews.SetViewOriginCoord(0, 1, 0);
1716     splitterViews.SetViewOriginCoord(0, 2, 0);
1717     splitterViews.SetViewOriginCoord(0, 3, 0);
1718 
1719     splitterViews.SetViewOriginCoord(1, 0, 0);
1720     splitterViews.SetViewOriginCoord(1, 1, 1);
1721     splitterViews.SetViewOriginCoord(1, 2, 0);
1722     splitterViews.SetViewOriginCoord(1, 3, 0);
1723 
1724     Layer* const splitter = graph.AddLayer<SplitterLayer>(splitterViews, "splitter");
1725 
1726     armnn::ActivationDescriptor activationDesc;
1727 
1728     Layer* const activ0_0 = graph.AddLayer<ActivationLayer>(activationDesc, "activ0_0");
1729     Layer* const activ0_1 = graph.AddLayer<ActivationLayer>(activationDesc, "activ0_1");
1730     Layer* const activ1_0 = graph.AddLayer<ActivationLayer>(activationDesc, "activ1_0");
1731     Layer* const activ1_1 = graph.AddLayer<ActivationLayer>(activationDesc, "activ1_1");
1732 
1733     Layer* const output1 = graph.AddLayer<OutputLayer>(1, "output1");
1734     Layer* const output2 = graph.AddLayer<OutputLayer>(2, "output2");
1735     Layer* const output3 = graph.AddLayer<OutputLayer>(3, "output3");
1736     Layer* const output4 = graph.AddLayer<OutputLayer>(4, "output4");
1737 
1738     // Adds connections.
1739     Connect(input, splitter, inputTensorInfo, 0, 0);
1740     Connect(splitter, activ0_0, splitTensorInfo1, 0, 0);
1741     Connect(splitter, activ0_1, splitTensorInfo1, 0, 0);
1742 
1743     Connect(splitter, activ1_0, splitTensorInfo2, 1, 0);
1744     Connect(splitter, activ1_1, splitTensorInfo2, 1, 0);
1745 
1746     Connect(activ0_0, output1, splitTensorInfo1, 0, 0);
1747     Connect(activ0_1, output2, splitTensorInfo1, 0, 0);
1748     Connect(activ1_0, output3, splitTensorInfo2, 0, 0);
1749     Connect(activ1_1, output4, splitTensorInfo2, 0, 0);
1750 
1751     CreateTensorHandles(graph, factory);
1752 
1753     auto workloadSplitter = MakeAndCheckWorkload<SplitterWorkload>(*splitter, factory);
1754     auto workloadActiv0_0 = MakeAndCheckWorkload<ActivationWorkload>(*activ0_0, factory);
1755     auto workloadActiv0_1 = MakeAndCheckWorkload<ActivationWorkload>(*activ0_1, factory);
1756     auto workloadActiv1_0 = MakeAndCheckWorkload<ActivationWorkload>(*activ1_0, factory);
1757     auto workloadActiv1_1 = MakeAndCheckWorkload<ActivationWorkload>(*activ1_1, factory);
1758 
1759     wlSplitter = std::move(workloadSplitter);
1760     wlActiv0_0 = std::move(workloadActiv0_0);
1761     wlActiv0_1 = std::move(workloadActiv0_1);
1762     wlActiv1_0 = std::move(workloadActiv1_0);
1763     wlActiv1_1 = std::move(workloadActiv1_1);
1764 }
1765 
1766 template <typename ResizeWorkload, armnn::DataType DataType>
CreateResizeBilinearWorkloadTest(armnn::IWorkloadFactory & factory,armnn::Graph & graph,DataLayout dataLayout=DataLayout::NCHW)1767 std::unique_ptr<ResizeWorkload> CreateResizeBilinearWorkloadTest(armnn::IWorkloadFactory& factory,
1768                                                                  armnn::Graph& graph,
1769                                                                  DataLayout dataLayout = DataLayout::NCHW)
1770 {
1771     TensorShape inputShape;
1772     TensorShape outputShape;
1773 
1774     switch (dataLayout) {
1775         case DataLayout::NHWC:
1776             inputShape =  { 2, 4, 4, 3 };
1777             outputShape = { 2, 2, 2, 3 };
1778             break;
1779         case DataLayout::NCHW:
1780         default:
1781             inputShape =  { 2, 3, 4, 4 };
1782             outputShape = { 2, 3, 2, 2 };
1783     }
1784 
1785     // Creates the layer we're testing.
1786     ResizeDescriptor resizeDesc;
1787     armnnUtils::DataLayoutIndexed dimensionIndices = dataLayout;
1788     resizeDesc.m_Method       = ResizeMethod::Bilinear;
1789     resizeDesc.m_TargetWidth  = outputShape[dimensionIndices.GetWidthIndex()];
1790     resizeDesc.m_TargetHeight = outputShape[dimensionIndices.GetHeightIndex()];
1791     resizeDesc.m_DataLayout   = dataLayout;
1792     Layer* const layer = graph.AddLayer<ResizeLayer>(resizeDesc, "resize");
1793 
1794     // Creates extra layers.
1795     Layer* const input  = graph.AddLayer<InputLayer>(0, "input");
1796     Layer* const output = graph.AddLayer<OutputLayer>(0, "output");
1797 
1798     // Connects up.
1799     armnn::TensorInfo inputTensorInfo(inputShape, DataType);
1800     armnn::TensorInfo outputTensorInfo(outputShape, DataType);
1801     Connect(input, layer, inputTensorInfo);
1802     Connect(layer, output, outputTensorInfo);
1803     CreateTensorHandles(graph, factory);
1804 
1805     // Makes the workload and checks it.
1806     auto workload = MakeAndCheckWorkload<ResizeWorkload>(*layer, factory);
1807 
1808     auto queueDescriptor = workload->GetData();
1809     CHECK(queueDescriptor.m_Inputs.size()  == 1);
1810     CHECK(queueDescriptor.m_Outputs.size() == 1);
1811     CHECK(queueDescriptor.m_Parameters.m_DataLayout == dataLayout);
1812 
1813     // Returns so we can do extra, backend-specific tests.
1814     return workload;
1815 }
1816 
1817 template <typename BatchToSpaceNdWorkload, armnn::DataType DataType>
CreateBatchToSpaceNdWorkloadTest(armnn::IWorkloadFactory & factory,armnn::Graph & graph)1818 std::unique_ptr<BatchToSpaceNdWorkload> CreateBatchToSpaceNdWorkloadTest(armnn::IWorkloadFactory& factory,
1819                                                                          armnn::Graph&  graph)
1820 {
1821     BatchToSpaceNdDescriptor desc;
1822     Layer* const layer = graph.AddLayer<BatchToSpaceNdLayer>(desc, "batchToSpace");
1823 
1824     // Creates extra layers.
1825     Layer* const input = graph.AddLayer<InputLayer>(0, "input");
1826     Layer* const output = graph.AddLayer<OutputLayer>(0, "output");
1827 
1828     // Connects up.
1829     armnn::TensorInfo tensorInfo({1, 1, 1, 1}, DataType);
1830 
1831     Connect(input, layer, tensorInfo);
1832     Connect(layer, output, tensorInfo);
1833 
1834     CreateTensorHandles(graph, factory);
1835 
1836     // Makes the workload and checks it.
1837     auto workload = MakeAndCheckWorkload<BatchToSpaceNdWorkload>(*layer, factory);
1838 
1839     BatchToSpaceNdQueueDescriptor queueDescriptor = workload->GetData();
1840     CHECK(queueDescriptor.m_Inputs.size() == 1);
1841     CHECK(queueDescriptor.m_Outputs.size() == 1);
1842 
1843     return workload;
1844 }
1845 
1846 template <typename LogSoftmaxWorkload, armnn::DataType DataType>
CreateLogSoftmaxWorkloadTest(armnn::IWorkloadFactory & factory,armnn::Graph & graph)1847 std::unique_ptr<LogSoftmaxWorkload> CreateLogSoftmaxWorkloadTest(armnn::IWorkloadFactory& factory,
1848                                                                  armnn::Graph& graph)
1849 {
1850     // Create the layer we're testing.
1851     LogSoftmaxDescriptor logSoftmaxDescriptor;
1852     // Set Axis to -1 if CL or Neon until further Axes are supported.
1853     if (factory.GetBackendId() == armnn::Compute::CpuAcc || factory.GetBackendId() == armnn::Compute::GpuAcc)
1854     {
1855         logSoftmaxDescriptor.m_Axis = -1;
1856     }
1857 
1858     Layer* const layer = graph.AddLayer<LogSoftmaxLayer>(logSoftmaxDescriptor, "layer");
1859     // Create extra layers.
1860     Layer* const input = graph.AddLayer<InputLayer>(0, "input");
1861     Layer* const output = graph.AddLayer<OutputLayer>(0, "output");
1862 
1863     // Connect up
1864     armnn::TensorInfo tensorInfo({4, 1}, DataType);
1865 
1866     Connect(input, layer, tensorInfo);
1867     Connect(layer, output, tensorInfo);
1868     CreateTensorHandles(graph, factory);
1869 
1870     // Make the workload and checks it.
1871     auto workload = MakeAndCheckWorkload<LogSoftmaxWorkload>(*layer, factory);
1872 
1873     LogSoftmaxQueueDescriptor queueDescriptor = workload->GetData();
1874     CHECK(queueDescriptor.m_Inputs.size() == 1);
1875     CHECK(queueDescriptor.m_Outputs.size() == 1);
1876 
1877     // Return so we can do extra, backend-specific tests.
1878     return workload;
1879 }
1880 
1881 template <typename L2NormalizationWorkload, armnn::DataType DataType>
CreateL2NormalizationWorkloadTest(armnn::IWorkloadFactory & factory,armnn::Graph & graph,DataLayout dataLayout=DataLayout::NCHW)1882 std::unique_ptr<L2NormalizationWorkload> CreateL2NormalizationWorkloadTest(armnn::IWorkloadFactory& factory,
1883     armnn::Graph& graph, DataLayout dataLayout = DataLayout::NCHW)
1884 {
1885     // Creates the layer we're testing.
1886     L2NormalizationDescriptor layerDesc;
1887     layerDesc.m_DataLayout = dataLayout;
1888 
1889     Layer* const layer = graph.AddLayer<L2NormalizationLayer>(layerDesc, "l2norm");
1890 
1891     // Creates extra layers.
1892     Layer* const input = graph.AddLayer<InputLayer>(0, "input");
1893     Layer* const output = graph.AddLayer<OutputLayer>(0, "output");
1894 
1895     TensorShape inputShape = (dataLayout == DataLayout::NCHW) ?
1896                 TensorShape{ 5, 20, 50, 67 } : TensorShape{ 5, 50, 67, 20 };
1897     TensorShape outputShape = (dataLayout == DataLayout::NCHW) ?
1898                 TensorShape{ 5, 20, 50, 67 } : TensorShape{ 5, 50, 67, 20 };
1899 
1900     // Connects up.
1901     armnn::TensorInfo inputTensorInfo(inputShape, DataType);
1902     armnn::TensorInfo outputTensorInfo(outputShape, DataType);
1903     Connect(input, layer, inputTensorInfo);
1904     Connect(layer, output, outputTensorInfo);
1905     CreateTensorHandles(graph, factory);
1906 
1907     // Makes the workload and checks it.
1908     auto workload = MakeAndCheckWorkload<L2NormalizationWorkload>(*layer, factory);
1909 
1910     L2NormalizationQueueDescriptor queueDescriptor = workload->GetData();
1911     CHECK((queueDescriptor.m_Parameters.m_DataLayout == dataLayout));
1912     CHECK(queueDescriptor.m_Inputs.size() == 1);
1913     CHECK(queueDescriptor.m_Outputs.size() == 1);
1914 
1915     // Returns so we can do extra, backend-specific tests.
1916     return workload;
1917 }
1918 
1919 template <typename ReshapeWorkload, armnn::DataType DataType>
CreateReshapeWorkloadTest(armnn::IWorkloadFactory & factory,armnn::Graph & graph)1920 std::unique_ptr<ReshapeWorkload> CreateReshapeWorkloadTest(armnn::IWorkloadFactory& factory,
1921     armnn::Graph& graph)
1922 {
1923     // Creates the layer we're testing.
1924     TensorShape outputShape({ 1, 4 });
1925     ReshapeDescriptor reshapeDesc;
1926     reshapeDesc.m_TargetShape = outputShape;
1927     Layer* const layer = graph.AddLayer<ReshapeLayer>(reshapeDesc, "layer");
1928 
1929     // Creates extra layers.
1930     Layer* const input = graph.AddLayer<InputLayer>(0, "input");
1931     Layer* const output = graph.AddLayer<OutputLayer>(0, "output");
1932 
1933     // Connects up.
1934     armnn::TensorInfo inputTensorInfo({ 4, 1 }, DataType);
1935     armnn::TensorInfo outputTensorInfo(outputShape, DataType);
1936     Connect(input, layer, inputTensorInfo);
1937     Connect(layer, output, outputTensorInfo);
1938     CreateTensorHandles(graph, factory);
1939 
1940     // Makes the workload and checks it.
1941     auto workload = MakeAndCheckWorkload<ReshapeWorkload>(*layer, factory);
1942 
1943     ReshapeQueueDescriptor queueDescriptor = workload->GetData();
1944     CHECK(queueDescriptor.m_Inputs.size() == 1);
1945     CHECK(queueDescriptor.m_Outputs.size() == 1);
1946 
1947     // Returns so we can do extra, backend-specific tests.
1948     return workload;
1949 }
1950 
1951 template <typename ConvertFp16ToFp32Float32Workload>
CreateConvertFp16ToFp32WorkloadTest(armnn::IWorkloadFactory & factory,armnn::Graph & graph)1952 std::unique_ptr<ConvertFp16ToFp32Float32Workload> CreateConvertFp16ToFp32WorkloadTest(
1953     armnn::IWorkloadFactory& factory, armnn::Graph& graph)
1954 {
1955     // Creates the layer we're testing.
1956     ConvertFp16ToFp32Layer* const layer = graph.AddLayer<ConvertFp16ToFp32Layer>("Fp16ToFp32Converter");
1957 
1958     // Creates extra layers.
1959     Layer* const input = graph.AddLayer<InputLayer>(0, "input");
1960     Layer* const output = graph.AddLayer<OutputLayer>(0, "output");
1961 
1962     // Connects up.
1963     armnn::TensorInfo inputTensorInfo({1, 3, 2, 3}, armnn::DataType::Float16);
1964     armnn::TensorInfo outputTensorInfo({1, 3, 2, 3}, armnn::DataType::Float32);
1965     Connect(input, layer, inputTensorInfo);
1966     Connect(layer, output, outputTensorInfo);
1967     CreateTensorHandles(graph, factory);
1968 
1969     // Makes the workload and checks it.
1970     auto workload = MakeAndCheckWorkload<ConvertFp16ToFp32Float32Workload>(*layer, factory);
1971 
1972     ConvertFp16ToFp32QueueDescriptor queueDescriptor = workload->GetData();
1973     CHECK(queueDescriptor.m_Inputs.size() == 1);
1974     CHECK(queueDescriptor.m_Outputs.size() == 1);
1975 
1976     // Returns so we can do extra, backend-specific tests.
1977     return workload;
1978 }
1979 
1980 template <typename ConvertFp32ToFp16Float16Workload>
CreateConvertFp32ToFp16WorkloadTest(armnn::IWorkloadFactory & factory,armnn::Graph & graph)1981 std::unique_ptr<ConvertFp32ToFp16Float16Workload> CreateConvertFp32ToFp16WorkloadTest(
1982     armnn::IWorkloadFactory& factory, armnn::Graph& graph)
1983 {
1984     // Creates the layer we're testing.
1985     ConvertFp32ToFp16Layer* const layer = graph.AddLayer<ConvertFp32ToFp16Layer>("Fp32ToFp16Converter");
1986 
1987     // Creates extra layers.
1988     Layer* const input = graph.AddLayer<InputLayer>(0, "input");
1989     Layer* const output = graph.AddLayer<OutputLayer>(0, "output");
1990 
1991     // Connects up.
1992     armnn::TensorInfo inputTensorInfo({1, 3, 2, 3}, armnn::DataType::Float32);
1993     armnn::TensorInfo outputTensorInfo({1, 3, 2, 3}, armnn::DataType::Float16);
1994     Connect(input, layer, inputTensorInfo);
1995     Connect(layer, output, outputTensorInfo);
1996     CreateTensorHandles(graph, factory);
1997 
1998     // Makes the workload and checks it.
1999     auto workload = MakeAndCheckWorkload<ConvertFp32ToFp16Float16Workload>(*layer, factory);
2000 
2001     ConvertFp32ToFp16QueueDescriptor queueDescriptor = workload->GetData();
2002     CHECK(queueDescriptor.m_Inputs.size() == 1);
2003     CHECK(queueDescriptor.m_Outputs.size() == 1);
2004 
2005     // Returns so we can do extra, backend-specific tests.
2006     return workload;
2007 }
2008 
2009 template <typename MeanWorkload, armnn::DataType DataType>
CreateMeanWorkloadTest(armnn::IWorkloadFactory & factory,armnn::Graph & graph)2010 std::unique_ptr<MeanWorkload> CreateMeanWorkloadTest(armnn::IWorkloadFactory& factory, armnn::Graph& graph)
2011 {
2012     // Reduce along the first and second dimensions, and do not keep the reduced dimensions.
2013     MeanDescriptor descriptor({ 1, 2 }, false);
2014 
2015     // Creates the layer we're testing.
2016     Layer* const layer = graph.AddLayer<MeanLayer>(descriptor, "mean");
2017 
2018     // Creates extra layers.
2019     Layer* const input = graph.AddLayer<InputLayer>(0, "input");
2020     Layer* const output = graph.AddLayer<OutputLayer>(0, "output");
2021 
2022     // Connects up.
2023     armnn::TensorInfo inputTensorInfo({ 1, 3, 7, 4 }, DataType);
2024     armnn::TensorInfo outputTensorInfo({ 1, 4 }, DataType);
2025     Connect(input, layer, inputTensorInfo);
2026     Connect(layer, output, outputTensorInfo);
2027     CreateTensorHandles(graph, factory);
2028 
2029     // Makes the workload and checks it.
2030     auto workload = MakeAndCheckWorkload<MeanWorkload>(*layer, factory);
2031 
2032     MeanQueueDescriptor queueDescriptor = workload->GetData();
2033     CHECK(queueDescriptor.m_Parameters.m_Axis == descriptor.m_Axis);
2034     CHECK(queueDescriptor.m_Parameters.m_KeepDims == descriptor.m_KeepDims);
2035     CHECK(queueDescriptor.m_Inputs.size() == 1);
2036     CHECK(queueDescriptor.m_Outputs.size() == 1);
2037 
2038     // Returns so we can do extra, backend-specific tests.
2039     return workload;
2040 }
2041 
2042 template<typename ConcatWorkload, armnn::DataType DataType>
CreateConcatWorkloadTest(armnn::IWorkloadFactory & factory,armnn::Graph & graph,const armnn::TensorShape & outputShape,unsigned int concatAxis)2043 std::unique_ptr<ConcatWorkload> CreateConcatWorkloadTest(armnn::IWorkloadFactory &factory,
2044                                                          armnn::Graph &graph,
2045                                                          const armnn::TensorShape &outputShape,
2046                                                          unsigned int concatAxis)
2047 {
2048     armnn::TensorInfo inputTensorInfo({ 2, 3, 2, 5 }, DataType);
2049     armnn::TensorInfo outputTensorInfo(outputShape, DataType);
2050 
2051     // Constructs the graph.
2052     Layer* const input0 = graph.AddLayer<InputLayer>(0, "input0");
2053     Layer* const input1 = graph.AddLayer<InputLayer>(1, "input1");
2054     armnn::OriginsDescriptor descriptor;
2055 
2056     std::vector<armnn::TensorShape> inputShapes{{ 2, 3, 2, 5 }, { 2, 3, 2, 5 }};
2057 
2058     descriptor = CreateDescriptorForConcatenation(inputShapes.begin(),
2059                                                   inputShapes.end(),
2060                                                   concatAxis);
2061 
2062     // create concat layer
2063     Layer* const concat = graph.AddLayer<ConcatLayer>(descriptor, "concat");
2064     CHECK(concat);
2065 
2066     Layer* const output = graph.AddLayer<OutputLayer>(0, "output");
2067 
2068     // Adds connections.
2069     // connect input0 to concat
2070     Connect(input0, concat, inputTensorInfo, 0, 0);
2071     // connect input1 to concat
2072     Connect(input1, concat, inputTensorInfo, 0, 1);
2073     // connect concat to output
2074     Connect(concat, output, outputTensorInfo, 0, 0);
2075 
2076     // create tensor handles
2077     CreateTensorHandles(graph, factory);
2078 
2079     // create concat workload
2080     auto workloadConcat = MakeAndCheckWorkload<ConcatWorkload>(*concat, factory);
2081     CHECK(workloadConcat);
2082 
2083     return workloadConcat;
2084 }
2085 
2086 template <typename PreCompiledWorkload, armnn::DataType dataType>
CreatePreCompiledWorkloadTest(armnn::IWorkloadFactory & factory,armnn::Graph & graph,bool biasEnabled=false)2087 std::pair<armnn::IOptimizedNetworkPtr, std::unique_ptr<PreCompiledWorkload>> CreatePreCompiledWorkloadTest(
2088     armnn::IWorkloadFactory& factory,
2089     armnn::Graph& graph,
2090     bool biasEnabled = false)
2091 {
2092     IgnoreUnused(graph);
2093 
2094     // build up the structure of the network
2095     armnn::INetworkPtr net(armnn::INetwork::Create());
2096 
2097     // Add an input layer
2098     armnn::IConnectableLayer* const inputLayer = net->AddInputLayer(0, "input layer");
2099     CHECK(inputLayer);
2100 
2101     // ArmNN weights tensor shape is OIHW (out channels, in channels, height, width) for NCHW
2102     // ArmNN weights tensor shape is OHWI (out channels, height, width, in channels) for NHWC
2103     // this test is using NHWC, so the weights shape is OHWI
2104     TensorInfo weightsTensorInfo(TensorShape({16, 1, 1, 16}), dataType, 0.9f, 0, true);
2105     unsigned int weightsLength = weightsTensorInfo.GetNumElements();
2106 
2107     using WeightType = armnn::ResolveType<dataType>;
2108     std::vector<WeightType> convWeightsData(weightsLength);
2109     for (unsigned int i = 0; i < weightsLength; ++i)
2110     {
2111         convWeightsData[i] = static_cast<WeightType>(i);
2112     }
2113 
2114     armnn::ConstTensor weights(weightsTensorInfo, convWeightsData);
2115 
2116     // Add a layer that can be used in the PreCompiled layer
2117     armnn::Convolution2dDescriptor convDesc2d;
2118     convDesc2d.m_StrideX = 1;
2119     convDesc2d.m_StrideY = 1;
2120     convDesc2d.m_BiasEnabled = biasEnabled;
2121     convDesc2d.m_DataLayout = armnn::DataLayout::NHWC;
2122 
2123 
2124     const std::string convLayerName("conv layer");
2125 
2126     armnn::IConnectableLayer* convLayer = net->AddConvolution2dLayer(convDesc2d, convLayerName.c_str());
2127 
2128     IConnectableLayer* weightsLayer = net->AddConstantLayer(weights);
2129     weightsLayer->GetOutputSlot(0).SetTensorInfo(weights.GetInfo());
2130     weightsLayer->GetOutputSlot(0).Connect(convLayer->GetInputSlot(1u));
2131 
2132     if (biasEnabled)
2133     {
2134         constexpr armnn::DataType biasDataType = ( dataType == armnn::DataType::QAsymmU8) ?
2135             armnn::DataType::Signed32 : armnn::DataType::Float32;
2136 
2137         TensorInfo biasTensorInfo(TensorShape({16}), biasDataType, 0.9f * 0.9f, 0, true);
2138         unsigned int biasLength = biasTensorInfo.GetNumElements();
2139 
2140         using BiasType = armnn::ResolveType<biasDataType>;
2141         std::vector<BiasType> biasData(biasLength);
2142         std::fill(biasData.begin(), biasData.end(), static_cast<BiasType>(0));
2143 
2144         armnn::ConstTensor biases(biasTensorInfo, biasData);
2145 
2146         IConnectableLayer* biasLayer = net->AddConstantLayer(biases);
2147 
2148         biasLayer->GetOutputSlot(0).SetTensorInfo(biases.GetInfo());
2149         biasLayer->GetOutputSlot(0).Connect(convLayer->GetInputSlot(2u));
2150     }
2151 
2152     CHECK(convLayer);
2153 
2154     // Add an output layer
2155     armnn::IConnectableLayer* const outputLayer = net->AddOutputLayer(0, "output layer");
2156     CHECK(outputLayer);
2157 
2158     // set the tensors in the network (NHWC format)
2159     TensorInfo inputTensorInfo(TensorShape({ 1, 16, 16, 16 }), dataType);
2160     if (dataType == armnn::DataType::QAsymmU8)
2161     {
2162         inputTensorInfo.SetQuantizationOffset(0);
2163         inputTensorInfo.SetQuantizationScale(0.9f);
2164     }
2165 
2166     TensorInfo outputTensorInfo(TensorShape({1, 16, 16, 16}), dataType);
2167     if (dataType == armnn::DataType::QAsymmU8)
2168     {
2169         outputTensorInfo.SetQuantizationOffset(0);
2170         outputTensorInfo.SetQuantizationScale(0.9f);
2171     }
2172 
2173     // Connect the layers
2174     inputLayer->GetOutputSlot(0).Connect(convLayer->GetInputSlot(0));
2175     inputLayer->GetOutputSlot(0).SetTensorInfo(inputTensorInfo);
2176 
2177     convLayer->GetOutputSlot(0).Connect(outputLayer->GetInputSlot(0));
2178     convLayer->GetOutputSlot(0).SetTensorInfo(outputTensorInfo);
2179 
2180     // Optimize the network for the backend supported by the factory
2181     std::vector<armnn::BackendId> backends = {factory.GetBackendId()};
2182     armnn::IRuntime::CreationOptions options;
2183     armnn::IRuntimePtr runtime(armnn::IRuntime::Create(options));
2184     armnn::OptimizerOptionsOpaque optimizerOptions;
2185     armnn::IOptimizedNetworkPtr optimizedNet = armnn::Optimize(*net, backends, runtime->GetDeviceSpec(),
2186                                                                optimizerOptions);
2187     CHECK(optimizedNet != nullptr);
2188 
2189     // Find the PreCompiled layer in the optimised graph
2190     armnn::Graph& optimisedGraph = GetGraphForTesting(optimizedNet.get());
2191     Layer* preCompiledLayer = nullptr;
2192     for (auto& layer : optimisedGraph)
2193     {
2194         if (layer->GetType() == LayerType::PreCompiled)
2195         {
2196             preCompiledLayer = layer;
2197         }
2198     }
2199     CHECK(preCompiledLayer != nullptr);
2200 
2201     // Create the TensorHandles.
2202     CreateTensorHandles(optimisedGraph, factory);
2203 
2204     // Make the workload and check it.
2205     auto workload = MakeAndCheckWorkload<PreCompiledWorkload>(*preCompiledLayer, factory);
2206 
2207     PreCompiledQueueDescriptor queueDescriptor = workload->GetData();
2208     CHECK(queueDescriptor.m_Inputs.size()  == 1);
2209     CHECK(queueDescriptor.m_Outputs.size() == 1);
2210 
2211     // Returns the workload so we can do extra, backend-specific tests.
2212     // NOTE: We need to return the optimised network as well, otherwise it gets
2213     // out of scope and the tensor handles get destructed
2214     return std::make_pair(std::move(optimizedNet), std::move(workload));
2215 }
2216 
2217 template<typename ConstantWorkload, armnn::DataType DataType>
CreateConstantWorkloadTest(armnn::IWorkloadFactory & factory,armnn::Graph & graph,const armnn::TensorShape & outputShape)2218 std::unique_ptr<ConstantWorkload> CreateConstantWorkloadTest(armnn::IWorkloadFactory& factory,
2219                                                              armnn::Graph& graph,
2220                                                              const armnn::TensorShape& outputShape)
2221 {
2222     armnn::TensorInfo outputTensorInfo(outputShape, DataType);
2223 
2224     // create constant layer
2225     auto constant = graph.AddLayer<ConstantLayer>("constant");
2226     CHECK(constant);
2227     constant->m_LayerOutput = std::make_unique<ScopedTensorHandle>(outputTensorInfo);
2228 
2229     Layer* const output = graph.AddLayer<OutputLayer>(0, "output");
2230 
2231     // Adds connections.
2232     // connect constant to output
2233     Connect(constant, output, outputTensorInfo, 0, 0);
2234 
2235     // create tensor handles
2236     CreateTensorHandles(graph, factory);
2237 
2238     // create Constant workload"
2239     auto workloadConstant = MakeAndCheckWorkload<ConstantWorkload>(*constant, factory);
2240     CHECK(workloadConstant);
2241 
2242     return workloadConstant;
2243 }
2244 
2245 template <typename PreluWorkload>
CreatePreluWorkloadTest(armnn::IWorkloadFactory & factory,armnn::Graph & graph,const armnn::TensorShape & inputShape,const armnn::TensorShape & alphaShape,const armnn::TensorShape & outputShape,armnn::DataType dataType)2246 std::unique_ptr<PreluWorkload> CreatePreluWorkloadTest(armnn::IWorkloadFactory& factory,
2247                                                        armnn::Graph& graph,
2248                                                        const armnn::TensorShape& inputShape,
2249                                                        const armnn::TensorShape& alphaShape,
2250                                                        const armnn::TensorShape& outputShape,
2251                                                        armnn::DataType dataType)
2252 {
2253     // Creates the PReLU layer
2254     Layer* const layer = graph.AddLayer<PreluLayer>("prelu");
2255     CHECK(layer != nullptr);
2256 
2257     // Creates extra layers
2258     Layer* const input  = graph.AddLayer<InputLayer> (0, "input");
2259     Layer* const alpha  = graph.AddLayer<InputLayer> (1, "alpha");
2260     Layer* const output = graph.AddLayer<OutputLayer>(0, "output");
2261     CHECK(input  != nullptr);
2262     CHECK(alpha  != nullptr);
2263     CHECK(output != nullptr);
2264 
2265     // Connects up
2266     armnn::TensorInfo inputTensorInfo (inputShape,  dataType);
2267     armnn::TensorInfo alphaTensorInfo (alphaShape,  dataType);
2268     armnn::TensorInfo outputTensorInfo(outputShape, dataType);
2269     Connect(input, layer,  inputTensorInfo,  0, 0);
2270     Connect(alpha, layer,  alphaTensorInfo,  0, 1);
2271     Connect(layer, output, outputTensorInfo, 0, 0);
2272     CreateTensorHandles(graph, factory);
2273 
2274     // Makes the workload and checks it
2275     auto workload = MakeAndCheckWorkload<PreluWorkload>(*layer, factory);
2276 
2277     PreluQueueDescriptor queueDescriptor = workload->GetData();
2278     CHECK(queueDescriptor.m_Inputs.size() == 2);
2279     CHECK(queueDescriptor.m_Outputs.size() == 1);
2280 
2281     // Returns so we can do extra, backend-specific tests.
2282     return workload;
2283 }
2284 
2285 template <typename SpaceToDepthWorkload, armnn::DataType DataType>
CreateSpaceToDepthWorkloadTest(armnn::IWorkloadFactory & factory,armnn::Graph & graph)2286 std::unique_ptr<SpaceToDepthWorkload> CreateSpaceToDepthWorkloadTest(armnn::IWorkloadFactory& factory,
2287                                                                      armnn::Graph&  graph)
2288 {
2289     SpaceToDepthDescriptor desc;
2290     desc.m_BlockSize = 2;
2291     Layer* const layer = graph.AddLayer<SpaceToDepthLayer>(desc, "spaceToDepth");
2292 
2293     // Creates extra layers.
2294     Layer* const input = graph.AddLayer<InputLayer>(0, "input");
2295     Layer* const output = graph.AddLayer<OutputLayer>(0, "output");
2296 
2297     // Connects up.
2298     armnn::TensorInfo inputTensorInfo({ 1, 2, 2, 1 }, DataType);
2299     armnn::TensorInfo outputTensorInfo({ 1, 1, 1, 4 }, DataType);
2300 
2301     Connect(input, layer, inputTensorInfo);
2302     Connect(layer, output, outputTensorInfo);
2303 
2304     CreateTensorHandles(graph, factory);
2305 
2306     // Makes the workload and checks it.
2307     auto workload = MakeAndCheckWorkload<SpaceToDepthWorkload>(*layer, factory);
2308 
2309     SpaceToDepthQueueDescriptor queueDescriptor = workload->GetData();
2310     CHECK(queueDescriptor.m_Inputs.size() == 1);
2311     CHECK(queueDescriptor.m_Outputs.size() == 1);
2312 
2313     return workload;
2314 }
2315 
2316 template <typename StackWorkload, armnn::DataType DataType>
CreateStackWorkloadTest(armnn::IWorkloadFactory & factory,armnn::Graph & graph,const armnn::TensorShape & inputShape,const armnn::TensorShape & outputShape,unsigned int axis,unsigned int numInputs)2317 std::unique_ptr<StackWorkload> CreateStackWorkloadTest(armnn::IWorkloadFactory& factory,
2318                                                        armnn::Graph& graph,
2319                                                        const armnn::TensorShape& inputShape,
2320                                                        const armnn::TensorShape& outputShape,
2321                                                        unsigned int axis,
2322                                                        unsigned int numInputs)
2323 {
2324     armnn::TensorInfo inputTensorInfo(inputShape, DataType);
2325     armnn::TensorInfo outputTensorInfo(outputShape, DataType);
2326 
2327     // Constructs the Stack layer.
2328     armnn::StackDescriptor descriptor(axis, numInputs, inputShape);
2329     Layer* const stackLayer = graph.AddLayer<StackLayer>(descriptor, "stack");
2330     CHECK(stackLayer != nullptr);
2331 
2332     // Constructs layer inputs and output.
2333     std::vector<Layer*> inputs;
2334     for (unsigned int i=0; i<numInputs; ++i)
2335     {
2336         inputs.push_back(graph.AddLayer<InputLayer>(
2337             static_cast<int>(i),
2338             ("input" + std::to_string(i)).c_str()
2339         ));
2340         CHECK(inputs[i] != nullptr);
2341     }
2342     Layer* const output = graph.AddLayer<OutputLayer>(0, "output");
2343     CHECK(output != nullptr);
2344 
2345     // Adds connections.
2346     for (unsigned int i=0; i<numInputs; ++i)
2347     {
2348         Connect(inputs[i], stackLayer, inputTensorInfo, 0, i);
2349     }
2350     Connect(stackLayer, output, outputTensorInfo, 0, 0);
2351 
2352     CreateTensorHandles(graph, factory);
2353 
2354     auto stackWorkload = MakeAndCheckWorkload<StackWorkload>(*stackLayer, factory);
2355     StackQueueDescriptor queueDescriptor = stackWorkload->GetData();
2356     CHECK(queueDescriptor.m_Inputs.size() == numInputs);
2357     CHECK(queueDescriptor.m_Outputs.size() == 1);
2358 
2359     return stackWorkload;
2360 }
2361 
2362 } // Anonymous namespace
2363