• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //
2 // Copyright © 2017 Arm Ltd. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5 #pragma once
6 
7 #include <armnn/Descriptors.hpp>
8 #include <armnn/INetwork.hpp>
9 #include <armnn/IRuntime.hpp>
10 
11 #include <Profiling.hpp>
12 #include <QuantizeHelper.hpp>
13 #include <ResolveType.hpp>
14 
15 #include <boost/test/unit_test.hpp>
16 
17 #include <vector>
18 
19 namespace
20 {
21 
22 using namespace armnn;
23 
24 template<typename T>
ConstantUsageTest(const std::vector<BackendId> & computeDevice,const TensorInfo & commonTensorInfo,const std::vector<T> & inputData,const std::vector<T> & constantData,const std::vector<T> & expectedOutputData)25 bool ConstantUsageTest(const std::vector<BackendId>& computeDevice,
26                        const TensorInfo& commonTensorInfo,
27                        const std::vector<T>& inputData,
28                        const std::vector<T>& constantData,
29                        const std::vector<T>& expectedOutputData)
30 {
31     // Create runtime in which test will run
32     IRuntime::CreationOptions options;
33     IRuntimePtr runtime(IRuntime::Create(options));
34 
35     // Builds up the structure of the network.
36     INetworkPtr net(INetwork::Create());
37 
38     IConnectableLayer* input = net->AddInputLayer(0);
39     IConnectableLayer* constant = net->AddConstantLayer(ConstTensor(commonTensorInfo, constantData));
40     IConnectableLayer* add = net->AddAdditionLayer();
41     IConnectableLayer* output = net->AddOutputLayer(0);
42 
43     input->GetOutputSlot(0).Connect(add->GetInputSlot(0));
44     constant->GetOutputSlot(0).Connect(add->GetInputSlot(1));
45     add->GetOutputSlot(0).Connect(output->GetInputSlot(0));
46 
47     // Sets the tensors in the network.
48     input->GetOutputSlot(0).SetTensorInfo(commonTensorInfo);
49     constant->GetOutputSlot(0).SetTensorInfo(commonTensorInfo);
50     add->GetOutputSlot(0).SetTensorInfo(commonTensorInfo);
51 
52     // optimize the network
53     IOptimizedNetworkPtr optNet = Optimize(*net, computeDevice, runtime->GetDeviceSpec());
54 
55     // Loads it into the runtime.
56     NetworkId netId;
57     runtime->LoadNetwork(netId, std::move(optNet));
58 
59     // Creates structures for input & output.
60     std::vector<T> outputData(inputData.size());
61 
62     InputTensors inputTensors
63     {
64         {0, ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData.data())}
65     };
66     OutputTensors outputTensors
67     {
68         {0, Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data())}
69     };
70 
71     // Does the inference.
72     runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
73 
74     // Checks the results.
75     return outputData == expectedOutputData;
76 }
77 
ConstantUsageFloat32Test(const std::vector<BackendId> & backends)78 inline bool ConstantUsageFloat32Test(const std::vector<BackendId>& backends)
79 {
80     const TensorInfo commonTensorInfo({ 2, 3 }, DataType::Float32);
81 
82     return ConstantUsageTest(backends,
83         commonTensorInfo,
84         std::vector<float>{ 1.f, 2.f, 3.f, 4.f, 5.f, 6.f }, // Input.
85         std::vector<float>{ 6.f, 5.f, 4.f, 3.f, 2.f, 1.f }, // Const input.
86         std::vector<float>{ 7.f, 7.f, 7.f, 7.f, 7.f, 7.f }  // Expected output.
87     );
88 }
89 
ConstantUsageUint8Test(const std::vector<BackendId> & backends)90 inline bool ConstantUsageUint8Test(const std::vector<BackendId>& backends)
91 {
92     TensorInfo commonTensorInfo({ 2, 3 }, DataType::QAsymmU8);
93 
94     const float scale = 0.023529f;
95     const int8_t offset = -43;
96 
97     commonTensorInfo.SetQuantizationScale(scale);
98     commonTensorInfo.SetQuantizationOffset(offset);
99 
100     return ConstantUsageTest(backends,
101         commonTensorInfo,
102         armnnUtils::QuantizedVector<uint8_t>({ 1.f, 2.f, 3.f, 4.f, 5.f, 6.f }, scale, offset), // Input.
103         armnnUtils::QuantizedVector<uint8_t>({ 6.f, 5.f, 4.f, 3.f, 2.f, 1.f }, scale, offset), // Const input.
104         armnnUtils::QuantizedVector<uint8_t>({ 7.f, 7.f, 7.f, 7.f, 7.f, 7.f }, scale, offset)  // Expected output.
105     );
106 }
107 
108 // Utility template for comparing tensor elements
109 template<DataType ArmnnType, typename T = ResolveType<ArmnnType>>
Compare(T a,T b,float tolerance=0.000001f)110 bool Compare(T a, T b, float tolerance = 0.000001f)
111 {
112     if (ArmnnType == DataType::Boolean)
113     {
114         // NOTE: Boolean is represented as uint8_t (with zero equals
115         // false and everything else equals true), therefore values
116         // need to be casted to bool before comparing them
117         return static_cast<bool>(a) == static_cast<bool>(b);
118     }
119 
120     // NOTE: All other types can be cast to float and compared with
121     // a certain level of tolerance
122     return std::fabs(static_cast<float>(a) - static_cast<float>(b)) <= tolerance;
123 }
124 
125 // Utility function to find the number of instances of a substring within a string.
SubStringCounter(std::string & string,std::string && substring)126 int SubStringCounter(std::string& string, std::string&& substring)
127 {
128     std::size_t found = 0;
129     int count = 0;
130     // Look for the substring starting from where we last found the substring
131     while((found = string.find(substring, found)) != std::string::npos)
132     {
133         count++;
134         // Offset by substring length to avoid finding the same substring twice
135         found += substring.length();
136     }
137     return count;
138 }
139 
140 template<DataType ArmnnIType, DataType ArmnnOType,
141          typename TInput = ResolveType<ArmnnIType>, typename TOutput = ResolveType<ArmnnOType>>
EndToEndLayerTestImpl(INetworkPtr network,const std::map<int,std::vector<TInput>> & inputTensorData,const std::map<int,std::vector<TOutput>> & expectedOutputData,std::vector<BackendId> backends,float tolerance=0.000001f)142 void EndToEndLayerTestImpl(INetworkPtr network,
143                            const std::map<int, std::vector<TInput>>& inputTensorData,
144                            const std::map<int, std::vector<TOutput>>& expectedOutputData,
145                            std::vector<BackendId> backends,
146                            float tolerance = 0.000001f)
147 {
148     // Create runtime in which test will run
149     IRuntime::CreationOptions options;
150     IRuntimePtr runtime(IRuntime::Create(options));
151 
152     // optimize the network
153     IOptimizedNetworkPtr optNet = Optimize(*network, backends, runtime->GetDeviceSpec());
154 
155     // Loads it into the runtime.
156     NetworkId netId;
157     runtime->LoadNetwork(netId, std::move(optNet));
158 
159     InputTensors inputTensors;
160     inputTensors.reserve(inputTensorData.size());
161     for (auto&& it : inputTensorData)
162     {
163         inputTensors.push_back({it.first,
164                                 ConstTensor(runtime->GetInputTensorInfo(netId, it.first), it.second.data())});
165     }
166     OutputTensors outputTensors;
167     outputTensors.reserve(expectedOutputData.size());
168     std::map<int, std::vector<TOutput>> outputStorage;
169     for (auto&& it : expectedOutputData)
170     {
171         std::vector<TOutput> out(it.second.size());
172         outputStorage.emplace(it.first, out);
173         outputTensors.push_back({it.first,
174                                  Tensor(runtime->GetOutputTensorInfo(netId, it.first),
175                                                outputStorage.at(it.first).data())});
176     }
177 
178     // Does the inference.
179     runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
180 
181     // Checks the results.
182     for (auto&& it : expectedOutputData)
183     {
184         std::vector<TOutput> out = outputStorage.at(it.first);
185         for (unsigned int i = 0; i < out.size(); ++i)
186         {
187             BOOST_CHECK_MESSAGE(Compare<ArmnnOType>(it.second[i], out[i], tolerance) == true,
188                     "Actual output: " << out[i] << ". Expected output:" << it.second[i]);
189 
190         }
191     }
192 }
193 
ImportNonAlignedInputPointerTest(std::vector<BackendId> backends)194 inline void ImportNonAlignedInputPointerTest(std::vector<BackendId> backends)
195 {
196     using namespace armnn;
197 
198     // Create runtime in which test will run
199     IRuntime::CreationOptions options;
200     IRuntimePtr runtime(armnn::IRuntime::Create(options));
201 
202     // build up the structure of the network
203     INetworkPtr net(INetwork::Create());
204 
205     IConnectableLayer* input = net->AddInputLayer(0);
206 
207     ActivationDescriptor descriptor;
208     descriptor.m_Function = ActivationFunction::Square;
209     IConnectableLayer* pooling = net->AddActivationLayer(descriptor);
210 
211     IConnectableLayer* output = net->AddOutputLayer(0);
212 
213     input->GetOutputSlot(0).Connect(pooling->GetInputSlot(0));
214     pooling->GetOutputSlot(0).Connect(output->GetInputSlot(0));
215 
216     input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32));
217     pooling->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32));
218 
219     // Optimize the network
220     IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec());
221     BOOST_CHECK(optNet);
222 
223     // Loads it into the runtime.
224     NetworkId netId;
225     std::string ignoredErrorMessage;
226     // Enable Importing
227     INetworkProperties networkProperties(true, false);
228     runtime->LoadNetwork(netId, std::move(optNet), ignoredErrorMessage, networkProperties);
229 
230     // Creates structures for input & output
231     std::vector<float> inputData
232     {
233         1.0f, 2.0f, 3.0f, 4.0f
234     };
235 
236     // Misaligned input
237     float* misalignedInputData = reinterpret_cast<float*>(reinterpret_cast<char*>(inputData.data()) + 1);
238 
239     std::vector<float> outputData(4);
240 
241     // Aligned output
242     float* alignedOutputData = outputData.data();
243 
244     InputTensors inputTensors
245     {
246         {0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), misalignedInputData)},
247     };
248     OutputTensors outputTensors
249     {
250         {0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), alignedOutputData)}
251     };
252 
253     runtime->GetProfiler(netId)->EnableProfiling(true);
254 
255     // Do the inference and expect it to fail with a ImportMemoryException
256     BOOST_CHECK_THROW(runtime->EnqueueWorkload(netId, inputTensors, outputTensors), MemoryImportException);
257 }
258 
ExportNonAlignedOutputPointerTest(std::vector<BackendId> backends)259 inline void ExportNonAlignedOutputPointerTest(std::vector<BackendId> backends)
260 {
261     using namespace armnn;
262 
263     // Create runtime in which test will run
264     IRuntime::CreationOptions options;
265     IRuntimePtr runtime(armnn::IRuntime::Create(options));
266 
267     // build up the structure of the network
268     INetworkPtr net(INetwork::Create());
269 
270     IConnectableLayer* input = net->AddInputLayer(0);
271 
272     ActivationDescriptor descriptor;
273     descriptor.m_Function = ActivationFunction::Square;
274     IConnectableLayer* pooling = net->AddActivationLayer(descriptor);
275 
276     IConnectableLayer* output = net->AddOutputLayer(0);
277 
278     input->GetOutputSlot(0).Connect(pooling->GetInputSlot(0));
279     pooling->GetOutputSlot(0).Connect(output->GetInputSlot(0));
280 
281     input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32));
282     pooling->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32));
283 
284     // Optimize the network
285     IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec());
286     BOOST_CHECK(optNet);
287 
288     // Loads it into the runtime.
289     NetworkId netId;
290     std::string ignoredErrorMessage;
291     // Enable Importing and Exporting
292     INetworkProperties networkProperties(true, true);
293     runtime->LoadNetwork(netId, std::move(optNet), ignoredErrorMessage, networkProperties);
294 
295     // Creates structures for input & output
296     std::vector<float> inputData
297     {
298         1.0f, 2.0f, 3.0f, 4.0f, 5.0f
299     };
300 
301     // Aligned input
302     float* alignedInputData = inputData.data();
303 
304     std::vector<float> outputData(5);
305 
306     // Misaligned output
307     float* misalignedOutputData = reinterpret_cast<float*>(reinterpret_cast<char*>(outputData.data()) + 1);
308 
309     InputTensors inputTensors
310     {
311         {0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), alignedInputData)},
312     };
313     OutputTensors outputTensors
314     {
315         {0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), misalignedOutputData)}
316     };
317 
318     // Do the inference and expect it to fail with a ExportMemoryException
319     if (backends[0] == Compute::CpuAcc)
320     {
321         // For CpuAcc the NeonTensorHandle will throw its own exception on misaligned memory
322         BOOST_CHECK_THROW(runtime->EnqueueWorkload(netId, inputTensors, outputTensors), MemoryImportException);
323     }
324     else
325     {
326         BOOST_CHECK_THROW(runtime->EnqueueWorkload(netId, inputTensors, outputTensors), MemoryExportException);
327     }
328 }
329 
ImportAlignedPointerTest(std::vector<BackendId> backends)330 inline void ImportAlignedPointerTest(std::vector<BackendId> backends)
331 {
332     using namespace armnn;
333 
334     // Create runtime in which test will run
335     IRuntime::CreationOptions options;
336     IRuntimePtr runtime(armnn::IRuntime::Create(options));
337 
338     // build up the structure of the network
339     INetworkPtr net(INetwork::Create());
340 
341     IConnectableLayer* input = net->AddInputLayer(0);
342 
343     ActivationDescriptor descriptor;
344     descriptor.m_Function = ActivationFunction::Square;
345     IConnectableLayer* pooling = net->AddActivationLayer(descriptor);
346 
347     IConnectableLayer* output = net->AddOutputLayer(0);
348 
349     input->GetOutputSlot(0).Connect(pooling->GetInputSlot(0));
350     pooling->GetOutputSlot(0).Connect(output->GetInputSlot(0));
351 
352     input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32));
353     pooling->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32));
354 
355     // Optimize the network
356     IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec());
357     BOOST_CHECK(optNet);
358 
359     // Loads it into the runtime.
360     NetworkId netId;
361     std::string ignoredErrorMessage;
362     // Enable Importing
363     INetworkProperties networkProperties(true, true);
364     runtime->LoadNetwork(netId, std::move(optNet), ignoredErrorMessage, networkProperties);
365 
366     // Creates structures for input & output
367     std::vector<float> inputData
368     {
369         1.0f, 2.0f, 3.0f, 4.0f
370     };
371 
372     std::vector<float> outputData(4);
373 
374     std::vector<float> expectedOutput
375     {
376         1.0f, 4.0f, 9.0f, 16.0f
377     };
378 
379     InputTensors inputTensors
380     {
381         {0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData.data())},
382     };
383     OutputTensors outputTensors
384     {
385         {0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data())}
386     };
387 
388     runtime->GetProfiler(netId)->EnableProfiling(true);
389 
390     // Do the inference
391     runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
392 
393     // Retrieve the Profiler.Print() output to get the workload execution
394     ProfilerManager& profilerManager = armnn::ProfilerManager::GetInstance();
395     std::stringstream ss;
396     profilerManager.GetProfiler()->Print(ss);;
397     std::string dump = ss.str();
398 
399     // Contains ActivationWorkload
400     std::size_t found = dump.find("ActivationWorkload");
401     BOOST_TEST(found != std::string::npos);
402 
403     // Contains SyncMemGeneric
404     found = dump.find("SyncMemGeneric");
405     BOOST_TEST(found != std::string::npos);
406 
407     // Does not contain CopyMemGeneric
408     found = dump.find("CopyMemGeneric");
409     BOOST_TEST(found == std::string::npos);
410 
411     // Check output is as expected
412     BOOST_TEST(outputData == expectedOutput);
413 }
414 
ImportOnlyWorkload(std::vector<BackendId> backends)415 inline void ImportOnlyWorkload(std::vector<BackendId> backends)
416 {
417     using namespace armnn;
418 
419     IRuntime::CreationOptions options;
420     IRuntimePtr runtime(IRuntime::Create(options));
421 
422     // Builds up the structure of the network.
423     INetworkPtr net(INetwork::Create());
424 
425     IConnectableLayer* input = net->AddInputLayer(0);
426 
427     ActivationDescriptor descriptor;
428     descriptor.m_Function = ActivationFunction::Square;
429     IConnectableLayer* pooling = net->AddActivationLayer(descriptor);
430 
431     IConnectableLayer* output = net->AddOutputLayer(0);
432 
433     input->GetOutputSlot(0).Connect(pooling->GetInputSlot(0));
434     pooling->GetOutputSlot(0).Connect(output->GetInputSlot(0));
435 
436     input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32));
437     pooling->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32));
438 
439     // optimize the network
440     IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec());
441 
442     BOOST_TEST_CHECKPOINT("Load Network");
443     // Load it into the runtime. It should pass.
444     NetworkId netId;
445     std::string ignoredErrorMessage;
446     INetworkProperties networkProperties(true, false);
447     BOOST_TEST(runtime->LoadNetwork(netId, std::move(optNet),ignoredErrorMessage, networkProperties)
448                == Status::Success);
449 
450     BOOST_TEST_CHECKPOINT("Generate Data");
451     // Creates structures for input & output
452     std::vector<float> inputData
453     {
454         1.0f, 2.0f, 3.0f, 4.0f
455     };
456 
457     std::vector<float> outputData(4);
458 
459     std::vector<float> expectedOutput
460     {
461          1.0f, 4.0f, 9.0f, 16.0f
462     };
463 
464     BOOST_TEST_CHECKPOINT("Create Network");
465     InputTensors inputTensors
466     {
467         {0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData.data())},
468     };
469     OutputTensors outputTensors
470     {
471         {0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data())}
472     };
473 
474     BOOST_TEST_CHECKPOINT("Get Profiler");
475 
476     runtime->GetProfiler(netId)->EnableProfiling(true);
477 
478     BOOST_TEST_CHECKPOINT("Run Inference");
479     // Do the inference
480     runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
481 
482     BOOST_TEST_CHECKPOINT("Print Profiler");
483     // Retrieve the Profiler.Print() output to get the workload execution
484     ProfilerManager& profilerManager = armnn::ProfilerManager::GetInstance();
485     std::stringstream ss;
486     profilerManager.GetProfiler()->Print(ss);;
487     std::string dump = ss.str();
488 
489     // Check there are no SyncMemGeneric workloads as we didn't export
490     BOOST_TEST_CHECKPOINT("Find SyncMemGeneric");
491     int count = SubStringCounter(dump, "SyncMemGeneric");
492     BOOST_TEST(count == 0);
493 
494     // Should only be 1 CopyMemGeneric for the output as we imported
495     BOOST_TEST_CHECKPOINT("Find CopyMemGeneric");
496     count = SubStringCounter(dump, "CopyMemGeneric");
497     BOOST_TEST(count == 1);
498 
499     // Check the output is correct
500     BOOST_CHECK_EQUAL_COLLECTIONS(outputData.begin(), outputData.end(), expectedOutput.begin(), expectedOutput.end());
501 }
502 
ExportOnlyWorkload(std::vector<BackendId> backends)503 inline void ExportOnlyWorkload(std::vector<BackendId> backends)
504 {
505     using namespace armnn;
506 
507     IRuntime::CreationOptions options;
508     IRuntimePtr runtime(IRuntime::Create(options));
509 
510     // Builds up the structure of the network.
511     INetworkPtr net(INetwork::Create());
512 
513     IConnectableLayer* input = net->AddInputLayer(0);
514 
515     ActivationDescriptor descriptor;
516     descriptor.m_Function = ActivationFunction::Square;
517     IConnectableLayer* pooling = net->AddActivationLayer(descriptor);
518 
519     IConnectableLayer* output = net->AddOutputLayer(0);
520 
521     input->GetOutputSlot(0).Connect(pooling->GetInputSlot(0));
522     pooling->GetOutputSlot(0).Connect(output->GetInputSlot(0));
523 
524     input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32));
525     pooling->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32));
526 
527     // optimize the network
528     IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec());
529 
530     BOOST_TEST_CHECKPOINT("Load Network");
531     // Load it into the runtime. It should pass.
532     NetworkId netId;
533     std::string ignoredErrorMessage;
534     INetworkProperties networkProperties(false, true);
535     BOOST_TEST(runtime->LoadNetwork(netId, std::move(optNet),ignoredErrorMessage, networkProperties)
536                == Status::Success);
537 
538     BOOST_TEST_CHECKPOINT("Generate Data");
539     // Creates structures for input & output
540     std::vector<float> inputData
541     {
542         1.0f, 2.0f, 3.0f, 4.0f
543     };
544 
545     std::vector<float> outputData(4);
546 
547     std::vector<float> expectedOutput
548     {
549          1.0f, 4.0f, 9.0f, 16.0f
550     };
551 
552     BOOST_TEST_CHECKPOINT("Create Network");
553     InputTensors inputTensors
554     {
555         {0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData.data())},
556     };
557     OutputTensors outputTensors
558     {
559         {0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data())}
560     };
561 
562     BOOST_TEST_CHECKPOINT("Get Profiler");
563 
564     runtime->GetProfiler(netId)->EnableProfiling(true);
565 
566     BOOST_TEST_CHECKPOINT("Run Inference");
567     // Do the inference
568     runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
569 
570     BOOST_TEST_CHECKPOINT("Print Profiler");
571     // Retrieve the Profiler.Print() output to get the workload execution
572     ProfilerManager& profilerManager = armnn::ProfilerManager::GetInstance();
573     std::stringstream ss;
574     profilerManager.GetProfiler()->Print(ss);;
575     std::string dump = ss.str();
576 
577     // Check there is a SyncMemGeneric workload as we exported
578     BOOST_TEST_CHECKPOINT("Find SyncMemGeneric");
579     int count = SubStringCounter(dump, "SyncMemGeneric");
580     BOOST_TEST(count == 1);
581 
582     // Should be 1 CopyMemGeneric for the output as we did not import
583     BOOST_TEST_CHECKPOINT("Find CopyMemGeneric");
584     count = SubStringCounter(dump, "CopyMemGeneric");
585     BOOST_TEST(count == 1);
586 
587     // Check the output is correct
588     BOOST_CHECK_EQUAL_COLLECTIONS(outputData.begin(), outputData.end(), expectedOutput.begin(), expectedOutput.end());
589 }
590 
ImportAndExportWorkload(std::vector<BackendId> backends)591 inline void ImportAndExportWorkload(std::vector<BackendId> backends)
592 {
593     using namespace armnn;
594 
595     IRuntime::CreationOptions options;
596     IRuntimePtr runtime(IRuntime::Create(options));
597 
598     // Builds up the structure of the network.
599     INetworkPtr net(INetwork::Create());
600 
601     IConnectableLayer* input = net->AddInputLayer(0);
602 
603     ActivationDescriptor descriptor;
604     descriptor.m_Function = ActivationFunction::Square;
605     IConnectableLayer* pooling = net->AddActivationLayer(descriptor);
606 
607     IConnectableLayer* output = net->AddOutputLayer(0);
608 
609     input->GetOutputSlot(0).Connect(pooling->GetInputSlot(0));
610     pooling->GetOutputSlot(0).Connect(output->GetInputSlot(0));
611 
612     input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32));
613     pooling->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32));
614 
615     IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec());
616 
617     BOOST_TEST_CHECKPOINT("Load Network");
618     // Load it into the runtime. It should pass.
619     NetworkId netId;
620     std::string ignoredErrorMessage;
621     INetworkProperties networkProperties(true, true);
622     BOOST_TEST(runtime->LoadNetwork(netId, std::move(optNet),ignoredErrorMessage, networkProperties)
623                == Status::Success);
624 
625     BOOST_TEST_CHECKPOINT("Generate Data");
626     // Creates structures for input & output
627     std::vector<float> inputData
628     {
629         1.0f, 2.0f, 3.0f, 4.0f
630     };
631 
632     std::vector<float> outputData(4);
633 
634     std::vector<float> expectedOutput
635     {
636          1.0f, 4.0f, 9.0f, 16.0f
637     };
638 
639     BOOST_TEST_CHECKPOINT("Create Network");
640     InputTensors inputTensors
641     {
642         {0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData.data())},
643     };
644     OutputTensors outputTensors
645     {
646         {0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data())}
647     };
648 
649     BOOST_TEST_CHECKPOINT("Get Profiler");
650 
651     runtime->GetProfiler(netId)->EnableProfiling(true);
652 
653     BOOST_TEST_CHECKPOINT("Run Inference");
654     // Do the inference
655     runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
656 
657     BOOST_TEST_CHECKPOINT("Print Profiler");
658     // Retrieve the Profiler.Print() output to get the workload execution
659     ProfilerManager& profilerManager = armnn::ProfilerManager::GetInstance();
660     std::stringstream ss;
661     profilerManager.GetProfiler()->Print(ss);;
662     std::string dump = ss.str();
663 
664     // Check there is a SyncMemGeneric workload as we exported
665     BOOST_TEST_CHECKPOINT("Find SyncMemGeneric");
666     int count = SubStringCounter(dump, "SyncMemGeneric");
667     BOOST_TEST(count == 1);
668 
669     // Shouldn't be any CopyMemGeneric workloads
670     BOOST_TEST_CHECKPOINT("Find CopyMemGeneric");
671     count = SubStringCounter(dump, "CopyMemGeneric");
672     BOOST_TEST(count == 0);
673 
674     // Check the output is correct
675     BOOST_CHECK_EQUAL_COLLECTIONS(outputData.begin(), outputData.end(), expectedOutput.begin(), expectedOutput.end());
676 }
677 
ExportOutputWithSeveralOutputSlotConnectionsTest(std::vector<BackendId> backends)678 inline void ExportOutputWithSeveralOutputSlotConnectionsTest(std::vector<BackendId> backends)
679 {
680     using namespace armnn;
681 
682     // Create runtime in which test will run
683     IRuntime::CreationOptions options;
684     IRuntimePtr runtime(armnn::IRuntime::Create(options));
685 
686     // build up the structure of the network
687     INetworkPtr net(INetwork::Create());
688 
689     IConnectableLayer* input = net->AddInputLayer(0);
690 
691     ActivationDescriptor descriptor;
692     descriptor.m_Function = ActivationFunction::Square;
693     IConnectableLayer* activation = net->AddActivationLayer(descriptor);
694 
695     IConnectableLayer* output0 = net->AddOutputLayer(0);
696     IConnectableLayer* output1 = net->AddOutputLayer(1);
697 
698     input->GetOutputSlot(0).Connect(activation->GetInputSlot(0));
699     activation->GetOutputSlot(0).Connect(output0->GetInputSlot(0));
700     activation->GetOutputSlot(0).Connect(output1->GetInputSlot(0));
701 
702     input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 4, 1 }, DataType::Float32));
703     activation->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 4, 1 }, DataType::Float32));
704 
705     // Optimize the network
706     IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec());
707 
708     // Loads it into the runtime.
709     NetworkId netId;
710     std::string ignoredErrorMessage;
711     // Enable Importing
712     INetworkProperties networkProperties(true, true);
713     runtime->LoadNetwork(netId, std::move(optNet), ignoredErrorMessage, networkProperties);
714 
715     // Creates structures for input & output
716     std::vector<float> inputData
717     {
718         1.0f, 2.0f, 3.0f, 4.0f
719     };
720 
721     std::vector<float> outputData0(4);
722     std::vector<float> outputData1(4);
723 
724     std::vector<float> expectedOutput
725     {
726          1.0f, 4.0f, 9.0f, 16.0f
727     };
728 
729     InputTensors inputTensors
730     {
731         {0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData.data())},
732     };
733     OutputTensors outputTensors
734     {
735         {0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData0.data())},
736         {1,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 1), outputData1.data())}
737     };
738 
739     // The result of the inference is not important, just the fact that there
740     // should not be CopyMemGeneric workloads.
741     runtime->GetProfiler(netId)->EnableProfiling(true);
742 
743     // Do the inference
744     runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
745 
746     // Retrieve the Profiler.Print() output to get the workload execution
747     ProfilerManager& profilerManager = armnn::ProfilerManager::GetInstance();
748     std::stringstream ss;
749     profilerManager.GetProfiler()->Print(ss);
750     std::string dump = ss.str();
751 
752     std::size_t found = std::string::npos;
753 
754     if (backends[0] == Compute::CpuRef)
755     {
756         found = dump.find("RefActivationWorkload");
757     }
758     else if (backends[0] == Compute::CpuAcc)
759     {
760         found = dump.find("NeonActivationWorkload");
761     }
762     else if (backends[0] == Compute::GpuAcc)
763     {
764         found = dump.find("ClActivationWorkload");
765     }
766 
767     BOOST_TEST(found != std::string::npos);
768     // No contains SyncMemGeneric
769     found = dump.find("SyncMemGeneric");
770     BOOST_TEST(found == std::string::npos);
771     // Contains CopyMemGeneric
772     found = dump.find("CopyMemGeneric");
773     BOOST_TEST(found != std::string::npos);
774 
775     // Check that the outputs are correct
776     BOOST_CHECK_EQUAL_COLLECTIONS(outputData0.begin(), outputData0.end(),
777                                   expectedOutput.begin(), expectedOutput.end());
778     BOOST_CHECK_EQUAL_COLLECTIONS(outputData1.begin(), outputData1.end(),
779                                   expectedOutput.begin(), expectedOutput.end());
780 }
781 
StridedSliceInvalidSliceEndToEndTest(std::vector<BackendId> backends)782 inline void StridedSliceInvalidSliceEndToEndTest(std::vector<BackendId> backends)
783 {
784     using namespace armnn;
785 
786     // Create runtime in which test will run
787     IRuntime::CreationOptions options;
788     IRuntimePtr runtime(armnn::IRuntime::Create(options));
789 
790     // build up the structure of the network
791     INetworkPtr net(INetwork::Create());
792 
793     IConnectableLayer* input = net->AddInputLayer(0);
794 
795     // Configure a strided slice with a stride the same size as the input but with a ShrinkAxisMask on the first
796     // dim of the output to make it too small to hold the specified slice.
797     StridedSliceDescriptor descriptor;
798     descriptor.m_Begin          = {0, 0};
799     descriptor.m_End            = {2, 3};
800     descriptor.m_Stride         = {1, 1};
801     descriptor.m_BeginMask      = 0;
802     descriptor.m_EndMask        = 0;
803     descriptor.m_ShrinkAxisMask = 1;
804     IConnectableLayer* stridedSlice = net->AddStridedSliceLayer(descriptor);
805 
806     IConnectableLayer* output0 = net->AddOutputLayer(0);
807 
808     input->GetOutputSlot(0).Connect(stridedSlice->GetInputSlot(0));
809     stridedSlice->GetOutputSlot(0).Connect(output0->GetInputSlot(0));
810 
811     input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 2, 3 }, DataType::Float32));
812     stridedSlice->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 3 }, DataType::Float32));
813 
814     // Attempt to optimize the network and check that the correct exception is thrown
815     BOOST_CHECK_THROW(Optimize(*net, backends, runtime->GetDeviceSpec()), armnn::LayerValidationException);
816 }
817 
818 } // anonymous namespace
819