• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //
2 // Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5 
6 #include <backendsCommon/test/CommonTestUtils.hpp>
7 #include <backendsCommon/test/mockBackend/MockImportBackend.hpp>
8 
9 #include <test/GraphUtils.hpp>
10 
11 #include <boost/test/unit_test.hpp>
12 
13 BOOST_AUTO_TEST_SUITE(NeonFallback)
14 
BOOST_AUTO_TEST_CASE(FallbackImportToCpuAcc)15 BOOST_AUTO_TEST_CASE(FallbackImportToCpuAcc)
16 {
17     using namespace armnn;
18 
19     // Create a mock backend object
20     MockImportBackendInitialiser initialiser; // Register the Mock Backend
21     auto backendObjPtr = CreateBackendObject(MockImportBackendId());
22     BOOST_TEST((backendObjPtr != nullptr));
23 
24     BackendIdSet backendIds = BackendRegistryInstance().GetBackendIds();
25     if (backendIds.find("MockRef") == backendIds.end())
26     {
27         std::string message = "Cannot load MockRef";
28         BOOST_FAIL(message);
29     }
30 
31     // Create runtime in which test will run and allow fallback to CpuRef.
32     IRuntime::CreationOptions options;
33     IRuntimePtr runtime(IRuntime::Create(options));
34 
35     // Builds up the structure of the network.
36     INetworkPtr net(INetwork::Create());
37 
38     IConnectableLayer* input0 = net->AddInputLayer(0, "input0");
39     IConnectableLayer* input1 = net->AddInputLayer(1, "input1");
40     IConnectableLayer* input2 = net->AddInputLayer(2, "input2");
41     IConnectableLayer* add = net->AddAdditionLayer("add");
42     IConnectableLayer* sub = net->AddSubtractionLayer("sub");
43     IConnectableLayer* output = net->AddOutputLayer(0, "output");
44 
45     input0->GetOutputSlot(0).Connect(add->GetInputSlot(0));
46     input1->GetOutputSlot(0).Connect(add->GetInputSlot(1));
47     input2->GetOutputSlot(0).Connect(sub->GetInputSlot(0));
48     add->GetOutputSlot(0).Connect(sub->GetInputSlot(1));
49     sub->GetOutputSlot(0).Connect(output->GetInputSlot(0));
50 
51     TensorInfo info = TensorInfo({ 1, 2, 3, 2 }, DataType::Float32);
52 
53     input0->GetOutputSlot(0).SetTensorInfo(info);
54     input1->GetOutputSlot(0).SetTensorInfo(info);
55     input2->GetOutputSlot(0).SetTensorInfo(info);
56     add->GetOutputSlot(0).SetTensorInfo(info);
57     sub->GetOutputSlot(0).SetTensorInfo(info);
58 
59     // optimize the network
60     std::vector<BackendId> backends = { "MockRef", Compute::CpuAcc };
61     OptimizerOptions optOptions;
62     optOptions.m_ImportEnabled = true;
63     IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec(), optOptions);
64 
65     OptimizedNetwork* optNetObjPtr = PolymorphicDowncast<OptimizedNetwork*>(optNet.get());
66     Graph& graph = optNetObjPtr->GetGraph();
67 
68     armnn::Layer* const layer0 = GetFirstLayerWithName(graph, "input0");
69     armnn::Layer* const layer1 = GetFirstLayerWithName(graph, "input1");
70     armnn::Layer* const layer2 = GetFirstLayerWithName(graph, "input2");
71     armnn::Layer* const layer3 = GetFirstLayerWithName(graph, "add");
72     armnn::Layer* const layer4 = GetFirstLayerWithName(graph, "[ add (0) -> sub (1) ]");
73     armnn::Layer* const layer5 = GetFirstLayerWithName(graph, "sub");
74     armnn::Layer* const layer6 = GetFirstLayerWithName(graph, "output");
75 
76     // Checks order is valid.
77     BOOST_TEST(CheckOrder(graph, layer0, layer1));
78     BOOST_TEST(CheckOrder(graph, layer1, layer2));
79     BOOST_TEST(CheckOrder(graph, layer2, layer3));
80     BOOST_TEST(CheckOrder(graph, layer3, layer4));
81     BOOST_TEST(CheckOrder(graph, layer4, layer5));
82     BOOST_TEST(CheckOrder(graph, layer5, layer6));
83 
84     // Load it into the runtime. It should pass.
85     NetworkId netId;
86     std::string ignoredErrorMessage;
87     INetworkProperties networkProperties(true, true);
88 
89     runtime->LoadNetwork(netId, std::move(optNet), ignoredErrorMessage, networkProperties);
90 
91     // Creates structures for input & output
92     std::vector<float> inputData0
93     {
94         1.0f, 1.0f, 2.0f, 2.0f, 2.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f, 6.0f
95     };
96     std::vector<float> inputData1
97     {
98         0.0f, 1.0f, 1.0f, 2.0f, 3.0f, 3.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f
99     };
100     std::vector<float> inputData2
101     {
102         12.0f, 11.0f, 10.0f, 9.0f, 8.0f, 7.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f, 1.0f
103     };
104 
105     std::vector<float> outputData(12);
106 
107     std::vector<float> expectedOutput
108     {
109         11.0f, 9.0f, 7.0f, 5.0f, 3.0f, 1.0f, -1.0f, -3.0f, -5.0f, -7.0f, -9.0f, -11.0f
110     };
111 
112     InputTensors inputTensors
113     {
114         { 0, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData0.data()) },
115         { 1, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 1), inputData1.data()) },
116         { 2, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 2), inputData2.data()) }
117     };
118     OutputTensors outputTensors
119     {
120         { 0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data()) }
121     };
122 
123     runtime->GetProfiler(netId)->EnableProfiling(true);
124 
125     // Do the inference
126     runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
127 
128     // Retrieve the Profiler.Print() output to get the workload execution
129     ProfilerManager& profilerManager = armnn::ProfilerManager::GetInstance();
130     std::stringstream ss;
131     profilerManager.GetProfiler()->Print(ss);;
132     std::string dump = ss.str();
133 
134     // Contains ImportMemGeneric
135     std::size_t found = dump.find("ImportMemGeneric");
136     BOOST_TEST(found != std::string::npos);
137 
138     // Contains SyncMemGeneric
139     found = dump.find("SyncMemGeneric");
140     BOOST_TEST(found != std::string::npos);
141 
142     // Does not contain CopyMemGeneric
143     found = dump.find("CopyMemGeneric");
144     BOOST_TEST(found == std::string::npos);
145 
146     // Use memory import between backends
147     BOOST_TEST((layer4->GetType() == LayerType::MemImport));
148 
149     // Check output is as expected
150     BOOST_TEST(outputData == expectedOutput);
151 }
152 
BOOST_AUTO_TEST_CASE(FallbackPaddingCopyToCpuAcc)153 BOOST_AUTO_TEST_CASE(FallbackPaddingCopyToCpuAcc)
154 {
155     using namespace armnn;
156 
157     // Create a mock backend object
158     MockImportBackendInitialiser initialiser; // Register the Mock Backend
159     auto backendObjPtr = CreateBackendObject(MockImportBackendId());
160     BOOST_TEST((backendObjPtr != nullptr));
161 
162     BackendIdSet backendIds = BackendRegistryInstance().GetBackendIds();
163     if (backendIds.find("MockRef") == backendIds.end())
164     {
165         std::string message = "Cannot load MockRef";
166         BOOST_FAIL(message);
167     }
168 
169     // Create runtime in which test will run and allow fallback to CpuRef.
170     IRuntime::CreationOptions options;
171     IRuntimePtr runtime(IRuntime::Create(options));
172 
173     // Builds up the structure of the network.
174     INetworkPtr net(INetwork::Create());
175 
176     Pooling2dDescriptor desc;
177 
178     IConnectableLayer* input0 = net->AddInputLayer(0, "input0");
179     IConnectableLayer* input1 = net->AddInputLayer(1, "input1");
180     IConnectableLayer* add = net->AddAdditionLayer("add");
181     IConnectableLayer* pooling = net->AddPooling2dLayer(desc, "pooling");
182     IConnectableLayer* output = net->AddOutputLayer(0, "output");
183 
184     input0->GetOutputSlot(0).Connect(add->GetInputSlot(0));
185     input1->GetOutputSlot(0).Connect(add->GetInputSlot(1));
186     add->GetOutputSlot(0).Connect(pooling->GetInputSlot(0));
187     pooling->GetOutputSlot(0).Connect(output->GetInputSlot(0));
188 
189     TensorInfo info = TensorInfo({ 1, 2, 3, 2 }, DataType::Float32);
190     TensorInfo poolingInfo = TensorInfo({ 1, 2, 1, 1 }, DataType::Float32);
191 
192     input0->GetOutputSlot(0).SetTensorInfo(info);
193     input1->GetOutputSlot(0).SetTensorInfo(info);
194     add->GetOutputSlot(0).SetTensorInfo(info);
195     pooling->GetOutputSlot(0).SetTensorInfo(poolingInfo);
196 
197     // optimize the network
198     std::vector<BackendId> backends = { "MockRef", Compute::CpuAcc };
199     OptimizerOptions optOptions;
200     optOptions.m_ImportEnabled = true;
201     IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec(), optOptions);
202 
203     OptimizedNetwork* optNetObjPtr = PolymorphicDowncast<OptimizedNetwork*>(optNet.get());
204     Graph& graph = optNetObjPtr->GetGraph();
205 
206     armnn::Layer* const layer0 = GetFirstLayerWithName(graph, "input0");
207     armnn::Layer* const layer1 = GetFirstLayerWithName(graph, "input1");
208     armnn::Layer* const layer2 = GetFirstLayerWithName(graph, "add");
209     armnn::Layer* const layer3 = GetFirstLayerWithName(graph, "[ add (0) -> pooling (0) ]");
210     armnn::Layer* const layer4 = GetFirstLayerWithName(graph, "pooling");
211     armnn::Layer* const layer5 = GetFirstLayerWithName(graph, "output");
212 
213     // Checks order is valid.
214     BOOST_TEST(CheckOrder(graph, layer0, layer1));
215     BOOST_TEST(CheckOrder(graph, layer1, layer2));
216     BOOST_TEST(CheckOrder(graph, layer2, layer3));
217     BOOST_TEST(CheckOrder(graph, layer3, layer4));
218     BOOST_TEST(CheckOrder(graph, layer4, layer5));
219 
220     // Load it into the runtime. It should pass.
221     NetworkId netId;
222     std::string ignoredErrorMessage;
223     INetworkProperties networkProperties(true, true);
224 
225     runtime->LoadNetwork(netId, std::move(optNet), ignoredErrorMessage, networkProperties);
226 
227     // Creates structures for input & output
228     std::vector<float> inputData0
229     {
230         1.0f, 1.0f, 2.0f, 2.0f, 2.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f, 6.0f
231     };
232     std::vector<float> inputData1
233     {
234         0.0f, 1.0f, 1.0f, 2.0f, 3.0f, 3.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f
235     };
236 
237     std::vector<float> outputData(2);
238 
239     std::vector<float> expectedOutput
240     {
241         6.0f, 12.0f
242     };
243 
244     InputTensors inputTensors
245     {
246         { 0, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData0.data()) },
247         { 1, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 1), inputData1.data()) }
248     };
249     OutputTensors outputTensors
250     {
251         { 0, armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data()) }
252     };
253 
254     runtime->GetProfiler(netId)->EnableProfiling(true);
255 
256     // Do the inference
257     runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
258 
259     // Retrieve the Profiler.Print() output to get the workload execution
260     ProfilerManager& profilerManager = armnn::ProfilerManager::GetInstance();
261     std::stringstream ss;
262     profilerManager.GetProfiler()->Print(ss);;
263     std::string dump = ss.str();
264 
265     // Contains CopyMemGeneric between the backends
266     std::size_t found = dump.find("CopyMemGeneric");
267     BOOST_TEST(found != std::string::npos);
268 
269     // Contains SyncMemGeneric for the output
270     found = dump.find("SyncMemGeneric");
271     BOOST_TEST(found != std::string::npos);
272 
273     // Does not contain ImportMemGeneric
274     found = dump.find("ImportMemGeneric");
275     BOOST_TEST(found == std::string::npos);
276 
277     // Use memory import between backends
278     BOOST_TEST((layer3->GetType() == LayerType::MemCopy));
279 
280     // Check output is as expected
281     BOOST_TEST(outputData == expectedOutput);
282 }
283 
BOOST_AUTO_TEST_CASE(FallbackImportFromCpuAcc)284 BOOST_AUTO_TEST_CASE(FallbackImportFromCpuAcc)
285 {
286     using namespace armnn;
287 
288     // Create a mock backend object
289     MockImportBackendInitialiser initialiser; // Register the Mock Backend
290     auto backendObjPtr = CreateBackendObject(MockImportBackendId());
291     BOOST_TEST((backendObjPtr != nullptr));
292 
293     BackendIdSet backendIds = BackendRegistryInstance().GetBackendIds();
294     if (backendIds.find("MockRef") == backendIds.end())
295     {
296         std::string message = "Cannot load MockRef";
297         BOOST_FAIL(message);
298     }
299 
300     // Create runtime in which test will run and allow fallback to CpuRef.
301     IRuntime::CreationOptions options;
302     IRuntimePtr runtime(IRuntime::Create(options));
303 
304     // Builds up the structure of the network.
305     INetworkPtr net(INetwork::Create());
306 
307     IConnectableLayer* input0 = net->AddInputLayer(0, "input0");
308     IConnectableLayer* input1 = net->AddInputLayer(1, "input1");
309     IConnectableLayer* input2 = net->AddInputLayer(2, "input2");
310     IConnectableLayer* sub = net->AddSubtractionLayer("sub");
311     IConnectableLayer* add = net->AddAdditionLayer("add");
312     IConnectableLayer* output = net->AddOutputLayer(0, "output");
313 
314     input0->GetOutputSlot(0).Connect(sub->GetInputSlot(0));
315     input1->GetOutputSlot(0).Connect(sub->GetInputSlot(1));
316     input2->GetOutputSlot(0).Connect(add->GetInputSlot(0));
317     sub->GetOutputSlot(0).Connect(add->GetInputSlot(1));
318     add->GetOutputSlot(0).Connect(output->GetInputSlot(0));
319 
320     TensorInfo info = TensorInfo({ 1, 2, 3, 2 }, DataType::Float32);
321 
322     input0->GetOutputSlot(0).SetTensorInfo(info);
323     input1->GetOutputSlot(0).SetTensorInfo(info);
324     input2->GetOutputSlot(0).SetTensorInfo(info);
325     sub->GetOutputSlot(0).SetTensorInfo(info);
326     add->GetOutputSlot(0).SetTensorInfo(info);
327 
328     // optimize the network
329     std::vector<BackendId> backends = { "MockRef", Compute::CpuAcc };
330     OptimizerOptions optOptions;
331     optOptions.m_ImportEnabled = true;
332     IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec(), optOptions);
333 
334     OptimizedNetwork* optNetObjPtr = PolymorphicDowncast<OptimizedNetwork*>(optNet.get());
335     Graph& graph = optNetObjPtr->GetGraph();
336 
337     armnn::Layer* const layer0 = GetFirstLayerWithName(graph, "input0");
338     armnn::Layer* const layer1 = GetFirstLayerWithName(graph, "input1");
339     armnn::Layer* const layer2 = GetFirstLayerWithName(graph, "input2");
340     armnn::Layer* const layer3 = GetFirstLayerWithName(graph, "sub");
341     armnn::Layer* const layer4 = GetFirstLayerWithName(graph, "[ sub (0) -> add (1) ]");
342     armnn::Layer* const layer5 = GetFirstLayerWithName(graph, "add");
343     armnn::Layer* const layer6 = GetFirstLayerWithName(graph, "output");
344 
345     // Checks order is valid.
346     BOOST_TEST(CheckOrder(graph, layer0, layer1));
347     BOOST_TEST(CheckOrder(graph, layer1, layer2));
348     BOOST_TEST(CheckOrder(graph, layer2, layer3));
349     BOOST_TEST(CheckOrder(graph, layer3, layer4));
350     BOOST_TEST(CheckOrder(graph, layer4, layer5));
351     BOOST_TEST(CheckOrder(graph, layer5, layer6));
352 
353     // Load it into the runtime. It should pass.
354     NetworkId netId;
355     std::string ignoredErrorMessage;
356     INetworkProperties networkProperties(true, true);
357 
358     runtime->LoadNetwork(netId, std::move(optNet), ignoredErrorMessage, networkProperties);
359 
360     // Creates structures for input & output
361     std::vector<float> inputData0
362     {
363         1.0f, 1.0f, 2.0f, 2.0f, 2.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f, 0.0f
364     };
365     std::vector<float> inputData1
366     {
367         0.0f, 1.0f, 1.0f, 2.0f, 3.0f, 3.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f
368     };
369     std::vector<float> inputData2
370     {
371         12.0f, 11.0f, 10.0f, 9.0f, 8.0f, 7.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f, 1.0f
372     };
373 
374     std::vector<float> outputData(12);
375 
376     std::vector<float> expectedOutput
377     {
378         13.0f, 11.0f, 11.0f, 9.0f, 7.0f, 7.0f, 7.0f, 5.0f, 5.0f, 3.0f, 3.0f, -5.0f
379     };
380 
381     InputTensors inputTensors
382     {
383         { 0, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData0.data()) },
384         { 1, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 1), inputData1.data()) },
385         { 2, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 2), inputData2.data()) }
386     };
387     OutputTensors outputTensors
388     {
389         { 0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data()) }
390     };
391 
392     runtime->GetProfiler(netId)->EnableProfiling(true);
393 
394     // Do the inference
395     runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
396 
397     // Retrieve the Profiler.Print() output to get the workload execution
398     ProfilerManager& profilerManager = armnn::ProfilerManager::GetInstance();
399     std::stringstream ss;
400     profilerManager.GetProfiler()->Print(ss);;
401     std::string dump = ss.str();
402 
403     // Contains ImportMemGeneric
404     std::size_t found = dump.find("ImportMemGeneric");
405     BOOST_TEST(found != std::string::npos);
406 
407     // Contains SyncMemGeneric
408     found = dump.find("SyncMemGeneric");
409     BOOST_TEST(found != std::string::npos);
410 
411     // Does not contain CopyMemGeneric
412     found = dump.find("CopyMemGeneric");
413     BOOST_TEST(found == std::string::npos);
414 
415     // Use memory import between backends
416     BOOST_TEST((layer4->GetType() == LayerType::MemImport));
417 
418     // Check output is as expected
419     BOOST_TEST(outputData == expectedOutput);
420 }
421 
BOOST_AUTO_TEST_CASE(FallbackPaddingCopyFromCpuAcc)422 BOOST_AUTO_TEST_CASE(FallbackPaddingCopyFromCpuAcc)
423 {
424     using namespace armnn;
425 
426     // Create a mock backend object
427     MockImportBackendInitialiser initialiser; // Register the Mock Backend
428     auto backendObjPtr = CreateBackendObject(MockImportBackendId());
429     BOOST_TEST((backendObjPtr != nullptr));
430 
431     BackendIdSet backendIds = BackendRegistryInstance().GetBackendIds();
432     if (backendIds.find("MockRef") == backendIds.end())
433     {
434         std::string message = "Cannot load MockRef";
435         BOOST_FAIL(message);
436     }
437 
438     // Create runtime in which test will run and allow fallback to CpuRef.
439     IRuntime::CreationOptions options;
440     IRuntimePtr runtime(IRuntime::Create(options));
441 
442     // Builds up the structure of the network.
443     INetworkPtr net(INetwork::Create());
444 
445     Pooling2dDescriptor desc;
446 
447     IConnectableLayer* input0 = net->AddInputLayer(0, "input0");
448     IConnectableLayer* input1 = net->AddInputLayer(1, "input1");
449     IConnectableLayer* pooling = net->AddPooling2dLayer(desc, "pooling");
450     IConnectableLayer* add = net->AddAdditionLayer("add");
451     IConnectableLayer* output = net->AddOutputLayer(0, "output");
452 
453     input0->GetOutputSlot(0).Connect(pooling->GetInputSlot(0));
454     input1->GetOutputSlot(0).Connect(add->GetInputSlot(1));
455     pooling->GetOutputSlot(0).Connect(add->GetInputSlot(0));
456     add->GetOutputSlot(0).Connect(output->GetInputSlot(0));
457 
458     TensorInfo inputInfo = TensorInfo({ 1, 2, 3, 2 }, DataType::Float32);
459     TensorInfo poolingInfo = TensorInfo({ 1, 2, 1, 1 }, DataType::Float32);
460 
461     input0->GetOutputSlot(0).SetTensorInfo(inputInfo);
462     input1->GetOutputSlot(0).SetTensorInfo(poolingInfo);
463     pooling->GetOutputSlot(0).SetTensorInfo(poolingInfo);
464     add->GetOutputSlot(0).SetTensorInfo(poolingInfo);
465 
466     // optimize the network
467     std::vector<BackendId> backends = { "MockRef", Compute::CpuAcc };
468     OptimizerOptions optOptions;
469     optOptions.m_ImportEnabled = true;
470     IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec(), optOptions);
471 
472     OptimizedNetwork* optNetObjPtr = PolymorphicDowncast<OptimizedNetwork*>(optNet.get());
473     Graph& graph = optNetObjPtr->GetGraph();
474 
475     armnn::Layer* const layer0 = GetFirstLayerWithName(graph, "input0");
476     armnn::Layer* const layer1 = GetFirstLayerWithName(graph, "input1");
477     armnn::Layer* const layer2 = GetFirstLayerWithName(graph, "pooling");
478     armnn::Layer* const layer3 = GetFirstLayerWithName(graph, "[ pooling (0) -> add (0) ]");
479     armnn::Layer* const layer4 = GetFirstLayerWithName(graph, "add");
480     armnn::Layer* const layer5 = GetFirstLayerWithName(graph, "output");
481 
482     // Checks order is valid.
483     BOOST_TEST(CheckOrder(graph, layer0, layer1));
484     BOOST_TEST(CheckOrder(graph, layer1, layer2));
485     BOOST_TEST(CheckOrder(graph, layer2, layer3));
486     BOOST_TEST(CheckOrder(graph, layer3, layer4));
487     BOOST_TEST(CheckOrder(graph, layer4, layer5));
488 
489     // Load it into the runtime. It should pass.
490     NetworkId netId;
491     std::string ignoredErrorMessage;
492     INetworkProperties networkProperties(true, true);
493 
494     runtime->LoadNetwork(netId, std::move(optNet), ignoredErrorMessage, networkProperties);
495 
496     // Creates structures for input & output
497     std::vector<float> inputData0
498     {
499         1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, 11.0f, 12.0f
500     };
501     std::vector<float> inputData1
502     {
503         -1.0f, 3.0f
504     };
505 
506     std::vector<float> outputData(2);
507 
508     std::vector<float> expectedOutput
509     {
510         5.0f, 15.0f
511     };
512 
513     InputTensors inputTensors
514     {
515         { 0, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData0.data()) },
516         { 1, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 1), inputData1.data()) }
517     };
518     OutputTensors outputTensors
519     {
520         { 0, armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data()) }
521     };
522 
523     runtime->GetProfiler(netId)->EnableProfiling(true);
524 
525     // Do the inference
526     runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
527 
528     // Retrieve the Profiler.Print() output to get the workload execution
529     ProfilerManager& profilerManager = armnn::ProfilerManager::GetInstance();
530     std::stringstream ss;
531     profilerManager.GetProfiler()->Print(ss);;
532     std::string dump = ss.str();
533 
534     // Contains CopyMemGeneric between the backends
535     std::size_t found = dump.find("CopyMemGeneric");
536     BOOST_TEST(found != std::string::npos);
537 
538     // Contains SyncMemGeneric for the output
539     found = dump.find("SyncMemGeneric");
540     BOOST_TEST(found != std::string::npos);
541 
542     // Does not contain ImportMemGeneric
543     found = dump.find("ImportMemGeneric");
544     BOOST_TEST(found == std::string::npos);
545 
546     // Use memory import between backends
547     BOOST_TEST((layer3->GetType() == LayerType::MemCopy));
548 
549     // Check output is as expected
550     BOOST_TEST(outputData == expectedOutput);
551 }
552 
BOOST_AUTO_TEST_CASE(FallbackDisableImportFromCpuAcc)553 BOOST_AUTO_TEST_CASE(FallbackDisableImportFromCpuAcc)
554 {
555     using namespace armnn;
556 
557     // Create a mock backend object
558     MockImportBackendInitialiser initialiser; // Register the Mock Backend
559     auto backendObjPtr = CreateBackendObject(MockImportBackendId());
560     BOOST_TEST((backendObjPtr != nullptr));
561 
562     BackendIdSet backendIds = BackendRegistryInstance().GetBackendIds();
563     if (backendIds.find("MockRef") == backendIds.end())
564     {
565         std::string message = "Cannot load MockRef";
566         BOOST_FAIL(message);
567     }
568 
569     // Create runtime in which test will run and allow fallback to CpuRef.
570     IRuntime::CreationOptions options;
571     IRuntimePtr runtime(IRuntime::Create(options));
572 
573     // Builds up the structure of the network.
574     INetworkPtr net(INetwork::Create());
575 
576     IConnectableLayer* input0 = net->AddInputLayer(0, "input0");
577     IConnectableLayer* input1 = net->AddInputLayer(1, "input1");
578     IConnectableLayer* input2 = net->AddInputLayer(2, "input2");
579     IConnectableLayer* sub = net->AddSubtractionLayer("sub");
580     IConnectableLayer* add = net->AddAdditionLayer("add");
581     IConnectableLayer* output = net->AddOutputLayer(0, "output");
582 
583     input0->GetOutputSlot(0).Connect(sub->GetInputSlot(0));
584     input1->GetOutputSlot(0).Connect(sub->GetInputSlot(1));
585     input2->GetOutputSlot(0).Connect(add->GetInputSlot(0));
586     sub->GetOutputSlot(0).Connect(add->GetInputSlot(1));
587     add->GetOutputSlot(0).Connect(output->GetInputSlot(0));
588 
589     TensorInfo info = TensorInfo({ 1, 2, 3, 2 }, DataType::Float32);
590 
591     input0->GetOutputSlot(0).SetTensorInfo(info);
592     input1->GetOutputSlot(0).SetTensorInfo(info);
593     input2->GetOutputSlot(0).SetTensorInfo(info);
594     sub->GetOutputSlot(0).SetTensorInfo(info);
595     add->GetOutputSlot(0).SetTensorInfo(info);
596 
597     // optimize the network
598     std::vector<BackendId> backends = { "MockRef", Compute::CpuAcc };
599     IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec());
600 
601     OptimizedNetwork* optNetObjPtr = PolymorphicDowncast<OptimizedNetwork*>(optNet.get());
602     Graph& graph = optNetObjPtr->GetGraph();
603 
604     armnn::Layer* const layer0 = GetFirstLayerWithName(graph, "input0");
605     armnn::Layer* const layer1 = GetFirstLayerWithName(graph, "input1");
606     armnn::Layer* const layer2 = GetFirstLayerWithName(graph, "input2");
607     armnn::Layer* const layer3 = GetFirstLayerWithName(graph, "sub");
608     armnn::Layer* const layer4 = GetFirstLayerWithName(graph, "[ sub (0) -> add (1) ]");
609     armnn::Layer* const layer5 = GetFirstLayerWithName(graph, "add");
610     armnn::Layer* const layer6 = GetFirstLayerWithName(graph, "output");
611 
612     // Checks order is valid.
613     BOOST_TEST(CheckOrder(graph, layer0, layer1));
614     BOOST_TEST(CheckOrder(graph, layer1, layer2));
615     BOOST_TEST(CheckOrder(graph, layer2, layer3));
616     BOOST_TEST(CheckOrder(graph, layer3, layer4));
617     BOOST_TEST(CheckOrder(graph, layer4, layer5));
618     BOOST_TEST(CheckOrder(graph, layer5, layer6));
619 
620     // Load it into the runtime. It should pass.
621     NetworkId netId;
622     std::string ignoredErrorMessage;
623     INetworkProperties networkProperties(false, false);
624 
625     runtime->LoadNetwork(netId, std::move(optNet), ignoredErrorMessage, networkProperties);
626 
627     // Creates structures for input & output
628     std::vector<float> inputData0
629     {
630         1.0f, 1.0f, 2.0f, 2.0f, 2.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f, 0.0f
631     };
632     std::vector<float> inputData1
633     {
634         0.0f, 1.0f, 1.0f, 2.0f, 3.0f, 3.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f
635     };
636     std::vector<float> inputData2
637     {
638         12.0f, 11.0f, 10.0f, 9.0f, 8.0f, 7.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f, 1.0f
639     };
640 
641     std::vector<float> outputData(12);
642 
643     std::vector<float> expectedOutput
644     {
645         13.0f, 11.0f, 11.0f, 9.0f, 7.0f, 7.0f, 7.0f, 5.0f, 5.0f, 3.0f, 3.0f, -5.0f
646     };
647 
648     InputTensors inputTensors
649     {
650         { 0, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData0.data()) },
651         { 1, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 1), inputData1.data()) },
652         { 2, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 2), inputData2.data()) }
653     };
654     OutputTensors outputTensors
655     {
656         { 0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data()) }
657     };
658 
659     runtime->GetProfiler(netId)->EnableProfiling(true);
660 
661     // Do the inference
662     runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
663 
664     // Retrieve the Profiler.Print() output to get the workload execution
665     ProfilerManager& profilerManager = armnn::ProfilerManager::GetInstance();
666     std::stringstream ss;
667     profilerManager.GetProfiler()->Print(ss);;
668     std::string dump = ss.str();
669 
670     // Contains CopyMemGeneric between the backends
671     std::size_t found = dump.find("CopyMemGeneric");
672     BOOST_TEST(found != std::string::npos);
673 
674     // Does not contain ImportMemGeneric
675     found = dump.find("ImportMemGeneric");
676     BOOST_TEST(found == std::string::npos);
677 
678     // Use memory import between backends
679     BOOST_TEST((layer4->GetType() == LayerType::MemCopy));
680 
681     // Check output is as expected
682     BOOST_TEST(outputData == expectedOutput);
683 }
684 
685 #if defined(ARMCOMPUTECL_ENABLED)
BOOST_AUTO_TEST_CASE(NeonImportEnabledFallbackToCl)686 BOOST_AUTO_TEST_CASE(NeonImportEnabledFallbackToCl)
687 {
688     using namespace armnn;
689 
690     IRuntime::CreationOptions options;
691     IRuntimePtr runtime(IRuntime::Create(options));
692 
693     // Builds up the structure of the network.
694     INetworkPtr net(INetwork::Create());
695 
696     IConnectableLayer* input0 = net->AddInputLayer(0, "input0");
697     IConnectableLayer* input1 = net->AddInputLayer(1, "input1");
698     IConnectableLayer* input2 = net->AddInputLayer(2, "input2");
699     IConnectableLayer* add = net->AddAdditionLayer("add");
700     IConnectableLayer* sub = net->AddSubtractionLayer("sub");
701     IConnectableLayer* output = net->AddOutputLayer(0, "output");
702 
703     input0->GetOutputSlot(0).Connect(add->GetInputSlot(0));
704     input1->GetOutputSlot(0).Connect(add->GetInputSlot(1));
705     input2->GetOutputSlot(0).Connect(sub->GetInputSlot(0));
706     add->GetOutputSlot(0).Connect(sub->GetInputSlot(1));
707     sub->GetOutputSlot(0).Connect(output->GetInputSlot(0));
708 
709     TensorInfo info = TensorInfo({ 1, 2, 3, 2 }, DataType::Float32);
710 
711     input0->GetOutputSlot(0).SetTensorInfo(info);
712     input1->GetOutputSlot(0).SetTensorInfo(info);
713     input2->GetOutputSlot(0).SetTensorInfo(info);
714     add->GetOutputSlot(0).SetTensorInfo(info);
715     sub->GetOutputSlot(0).SetTensorInfo(info);
716 
717     std::vector<BackendId> backends = { Compute::CpuAcc, Compute::GpuAcc };
718     // Use BackendSelectionHint to specify GpuAcc for Subtraction layer
719     sub->BackendSelectionHint(backends[1]);
720 
721     // optimize the network
722     OptimizerOptions optOptions;
723     optOptions.m_ImportEnabled = true;
724     IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec(), optOptions);
725 
726     OptimizedNetwork* optNetObjPtr = PolymorphicDowncast<OptimizedNetwork*>(optNet.get());
727     Graph& graph = optNetObjPtr->GetGraph();
728 
729     armnn::Layer* const layer0 = GetFirstLayerWithName(graph, "input0");
730     armnn::Layer* const layer1 = GetFirstLayerWithName(graph, "input1");
731     armnn::Layer* const layer2 = GetFirstLayerWithName(graph, "input2");
732     armnn::Layer* const layer3 = GetFirstLayerWithName(graph, "add");
733     armnn::Layer* const layer4 = GetFirstLayerWithName(graph, "[ add (0) -> sub (1) ]");
734     armnn::Layer* const layer5 = GetFirstLayerWithName(graph, "sub");
735     armnn::Layer* const layer6 = GetFirstLayerWithName(graph, "output");
736 
737     // Checks order is valid.
738     BOOST_TEST(CheckOrder(graph, layer0, layer1));
739     BOOST_TEST(CheckOrder(graph, layer1, layer2));
740     BOOST_TEST(CheckOrder(graph, layer2, layer3));
741     BOOST_TEST(CheckOrder(graph, layer3, layer4));
742     BOOST_TEST(CheckOrder(graph, layer4, layer5));
743     BOOST_TEST(CheckOrder(graph, layer5, layer6));
744 
745     // Use memory import between backends
746     BOOST_TEST((layer4->GetType() == LayerType::MemCopy));
747 
748     // Correctly use backend hint
749     BOOST_TEST((layer5->GetBackendId() == Compute::GpuAcc ));
750 
751     // Load it into the runtime. It should pass.
752     NetworkId netId;
753     std::string ignoredErrorMessage;
754     INetworkProperties networkProperties(true, true);
755 
756     runtime->LoadNetwork(netId, std::move(optNet), ignoredErrorMessage, networkProperties);
757 
758     // Creates structures for input & output
759     std::vector<float> inputData0
760     {
761         1.0f, 1.0f, 2.0f, 2.0f, 2.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f, 6.0f
762     };
763     std::vector<float> inputData1
764     {
765         0.0f, 1.0f, 1.0f, 2.0f, 3.0f, 3.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f
766     };
767     std::vector<float> inputData2
768     {
769         12.0f, 11.0f, 10.0f, 9.0f, 8.0f, 7.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f, 1.0f
770     };
771 
772     std::vector<float> outputData(12);
773 
774     std::vector<float> expectedOutput
775     {
776         11.0f, 9.0f, 7.0f, 5.0f, 3.0f, 1.0f, -1.0f, -3.0f, -5.0f, -7.0f, -9.0f, -11.0f
777     };
778 
779     InputTensors inputTensors
780     {
781         { 0, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData0.data()) },
782         { 1, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 1), inputData1.data()) },
783         { 2, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 2), inputData2.data()) }
784     };
785     OutputTensors outputTensors
786     {
787         { 0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data()) }
788     };
789 
790     runtime->GetProfiler(netId)->EnableProfiling(true);
791 
792     // Do the inference
793     runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
794 
795     // Retrieve the Profiler.Print() output to get the workload execution
796     ProfilerManager& profilerManager = armnn::ProfilerManager::GetInstance();
797     std::stringstream ss;
798     profilerManager.GetProfiler()->Print(ss);;
799     std::string dump = ss.str();
800 
801     // Executed Subtraction using GpuAcc
802     std::size_t found = dump.find("ClSubtractionWorkload_Execute");
803     BOOST_TEST(found != std::string::npos);
804 
805     // Contain CopyMemGeneric
806     found = dump.find("CopyMemGeneric");
807     BOOST_TEST(found != std::string::npos);
808 
809     // Check output is as expected
810     BOOST_TEST(outputData == expectedOutput);
811 }
812 
BOOST_AUTO_TEST_CASE(NeonImportDisabledFallbackToCl)813 BOOST_AUTO_TEST_CASE(NeonImportDisabledFallbackToCl)
814 {
815     using namespace armnn;
816 
817     IRuntime::CreationOptions options;
818     IRuntimePtr runtime(IRuntime::Create(options));
819 
820     // Builds up the structure of the network.
821     INetworkPtr net(INetwork::Create());
822 
823     IConnectableLayer* input0 = net->AddInputLayer(0, "input0");
824     IConnectableLayer* input1 = net->AddInputLayer(1, "input1");
825     IConnectableLayer* input2 = net->AddInputLayer(2, "input2");
826     IConnectableLayer* add = net->AddAdditionLayer("add");
827     IConnectableLayer* sub = net->AddSubtractionLayer("sub");
828     IConnectableLayer* output = net->AddOutputLayer(0, "output");
829 
830     input0->GetOutputSlot(0).Connect(add->GetInputSlot(0));
831     input1->GetOutputSlot(0).Connect(add->GetInputSlot(1));
832     input2->GetOutputSlot(0).Connect(sub->GetInputSlot(0));
833     add->GetOutputSlot(0).Connect(sub->GetInputSlot(1));
834     sub->GetOutputSlot(0).Connect(output->GetInputSlot(0));
835 
836     TensorInfo info = TensorInfo({ 1, 2, 3, 2 }, DataType::Float32);
837 
838     input0->GetOutputSlot(0).SetTensorInfo(info);
839     input1->GetOutputSlot(0).SetTensorInfo(info);
840     input2->GetOutputSlot(0).SetTensorInfo(info);
841     add->GetOutputSlot(0).SetTensorInfo(info);
842     sub->GetOutputSlot(0).SetTensorInfo(info);
843 
844     std::vector<BackendId> backends = { Compute::CpuAcc, Compute::GpuAcc };
845     // Use BackendSelectionHint to specify GpuAcc for Subtraction layer
846     sub->BackendSelectionHint(backends[1]);
847 
848     // optimize the network
849     OptimizerOptions optOptions;
850     IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec(), optOptions);
851 
852     OptimizedNetwork* optNetObjPtr = PolymorphicDowncast<OptimizedNetwork*>(optNet.get());
853     Graph& graph = optNetObjPtr->GetGraph();
854 
855     armnn::Layer* const layer0 = GetFirstLayerWithName(graph, "input0");
856     armnn::Layer* const layer1 = GetFirstLayerWithName(graph, "input1");
857     armnn::Layer* const layer2 = GetFirstLayerWithName(graph, "input2");
858     armnn::Layer* const layer3 = GetFirstLayerWithName(graph, "add");
859     armnn::Layer* const layer4 = GetFirstLayerWithName(graph, "[ add (0) -> sub (1) ]");
860     armnn::Layer* const layer5 = GetFirstLayerWithName(graph, "sub");
861     armnn::Layer* const layer6 = GetFirstLayerWithName(graph, "output");
862 
863     // Checks order is valid.
864     BOOST_TEST(CheckOrder(graph, layer0, layer1));
865     BOOST_TEST(CheckOrder(graph, layer1, layer2));
866     BOOST_TEST(CheckOrder(graph, layer2, layer3));
867     BOOST_TEST(CheckOrder(graph, layer3, layer4));
868     BOOST_TEST(CheckOrder(graph, layer4, layer5));
869     BOOST_TEST(CheckOrder(graph, layer5, layer6));
870 
871     // Use memory import between backends
872     BOOST_TEST((layer4->GetType() == LayerType::MemCopy));
873 
874     // Correctly use backend hint
875     BOOST_TEST((layer5->GetBackendId() == Compute::GpuAcc ));
876 
877     // Load it into the runtime. It should pass.
878     NetworkId netId;
879     runtime->LoadNetwork(netId, std::move(optNet));
880 
881     // Creates structures for input & output
882     std::vector<float> inputData0
883     {
884         1.0f, 1.0f, 2.0f, 2.0f, 2.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f, 6.0f
885     };
886     std::vector<float> inputData1
887     {
888         0.0f, 1.0f, 1.0f, 2.0f, 3.0f, 3.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f
889     };
890     std::vector<float> inputData2
891     {
892         12.0f, 11.0f, 10.0f, 9.0f, 8.0f, 7.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f, 1.0f
893     };
894 
895     std::vector<float> outputData(12);
896 
897     std::vector<float> expectedOutput
898     {
899         11.0f, 9.0f, 7.0f, 5.0f, 3.0f, 1.0f, -1.0f, -3.0f, -5.0f, -7.0f, -9.0f, -11.0f
900     };
901 
902     InputTensors inputTensors
903     {
904         { 0, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData0.data()) },
905         { 1, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 1), inputData1.data()) },
906         { 2, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 2), inputData2.data()) }
907     };
908     OutputTensors outputTensors
909     {
910         { 0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data()) }
911     };
912 
913     runtime->GetProfiler(netId)->EnableProfiling(true);
914 
915     // Do the inference
916     runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
917 
918     // Retrieve the Profiler.Print() output to get the workload execution
919     ProfilerManager& profilerManager = armnn::ProfilerManager::GetInstance();
920     std::stringstream ss;
921     profilerManager.GetProfiler()->Print(ss);;
922     std::string dump = ss.str();
923 
924     // Executed Subtraction using GpuAcc
925     std::size_t found = dump.find("ClSubtractionWorkload_Execute");
926     BOOST_TEST(found != std::string::npos);
927 
928     // Contain CopyMemGeneric
929     found = dump.find("CopyMemGeneric");
930     BOOST_TEST(found != std::string::npos);
931 
932     // Check output is as expected
933     BOOST_TEST(outputData == expectedOutput);
934 }
935 
BOOST_AUTO_TEST_CASE(NeonImportEnabledFallbackSubgraphToCl)936 BOOST_AUTO_TEST_CASE(NeonImportEnabledFallbackSubgraphToCl)
937 {
938     using namespace armnn;
939 
940     IRuntime::CreationOptions options;
941     IRuntimePtr runtime(IRuntime::Create(options));
942 
943     // Builds up the structure of the network.
944     INetworkPtr net(INetwork::Create());
945 
946     Pooling2dDescriptor desc;
947 
948     IConnectableLayer* input0 = net->AddInputLayer(0, "input0");
949     IConnectableLayer* input1 = net->AddInputLayer(1, "input1");
950     IConnectableLayer* input2 = net->AddInputLayer(2, "input2");
951     IConnectableLayer* add = net->AddAdditionLayer("add");
952     IConnectableLayer* sub = net->AddSubtractionLayer("sub");
953     IConnectableLayer* pooling = net->AddPooling2dLayer(desc, "pooling");
954     IConnectableLayer* output = net->AddOutputLayer(0, "output");
955 
956     input0->GetOutputSlot(0).Connect(add->GetInputSlot(0));
957     input1->GetOutputSlot(0).Connect(add->GetInputSlot(1));
958     input2->GetOutputSlot(0).Connect(sub->GetInputSlot(0));
959     add->GetOutputSlot(0).Connect(sub->GetInputSlot(1));
960     sub->GetOutputSlot(0).Connect(pooling->GetInputSlot(0));
961     pooling->GetOutputSlot(0).Connect(output->GetInputSlot(0));
962 
963     TensorInfo info = TensorInfo({ 1, 2, 3, 2 }, DataType::Float32);
964     TensorInfo poolingInfo = TensorInfo({ 1, 2, 1, 1 }, DataType::Float32);
965 
966     input0->GetOutputSlot(0).SetTensorInfo(info);
967     input1->GetOutputSlot(0).SetTensorInfo(info);
968     input2->GetOutputSlot(0).SetTensorInfo(info);
969     add->GetOutputSlot(0).SetTensorInfo(info);
970     sub->GetOutputSlot(0).SetTensorInfo(info);
971     pooling->GetOutputSlot(0).SetTensorInfo(poolingInfo);
972 
973     std::vector<BackendId> backends = { Compute::CpuAcc, Compute::GpuAcc };
974     // Use BackendSelectionHint to specify GpuAcc for Subtraction layer
975     sub->BackendSelectionHint(backends[1]);
976 
977     // optimize the network
978     OptimizerOptions optOptions;
979     optOptions.m_ImportEnabled = true;
980     IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec(), optOptions);
981 
982     OptimizedNetwork* optNetObjPtr = PolymorphicDowncast<OptimizedNetwork*>(optNet.get());
983     Graph& graph = optNetObjPtr->GetGraph();
984 
985     armnn::Layer* const layer0 = GetFirstLayerWithName(graph, "input0");
986     armnn::Layer* const layer1 = GetFirstLayerWithName(graph, "input1");
987     armnn::Layer* const layer2 = GetFirstLayerWithName(graph, "input2");
988     armnn::Layer* const layer3 = GetFirstLayerWithName(graph, "add");
989     armnn::Layer* const layer4 = GetFirstLayerWithName(graph, "[ add (0) -> sub (1) ]");
990     armnn::Layer* const layer5 = GetFirstLayerWithName(graph, "sub");
991     armnn::Layer* const layer6 = GetFirstLayerWithName(graph, "[ sub (0) -> pooling (0) ]");
992     armnn::Layer* const layer7 = GetFirstLayerWithName(graph, "pooling");
993     armnn::Layer* const layer8 = GetFirstLayerWithName(graph, "output");
994 
995     // Checks order is valid.
996     BOOST_TEST(CheckOrder(graph, layer0, layer1));
997     BOOST_TEST(CheckOrder(graph, layer1, layer2));
998     BOOST_TEST(CheckOrder(graph, layer2, layer3));
999     BOOST_TEST(CheckOrder(graph, layer3, layer4));
1000     BOOST_TEST(CheckOrder(graph, layer4, layer5));
1001     BOOST_TEST(CheckOrder(graph, layer5, layer6));
1002     BOOST_TEST(CheckOrder(graph, layer6, layer7));
1003     BOOST_TEST(CheckOrder(graph, layer7, layer8));
1004 
1005     // Use memory import between backends
1006     BOOST_TEST((layer4->GetType() == LayerType::MemCopy));
1007     BOOST_TEST((layer6->GetType() == LayerType::MemCopy));
1008 
1009     // Correctly use backend hint
1010     BOOST_TEST((layer5->GetBackendId() == Compute::GpuAcc ));
1011 
1012     // Load it into the runtime. It should pass.
1013     NetworkId netId;
1014     std::string ignoredErrorMessage;
1015     INetworkProperties networkProperties(true, true);
1016 
1017     runtime->LoadNetwork(netId, std::move(optNet), ignoredErrorMessage, networkProperties);
1018 
1019     // Creates structures for input & output
1020     std::vector<float> inputData0
1021     {
1022         1.0f, 1.0f, 2.0f, 2.0f, 2.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f, 6.0f
1023     };
1024     std::vector<float> inputData1
1025     {
1026         0.0f, 1.0f, 1.0f, 2.0f, 3.0f, 3.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f
1027     };
1028     std::vector<float> inputData2
1029     {
1030         12.0f, 11.0f, 10.0f, 9.0f, 8.0f, 7.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f, 1.0f
1031     };
1032 
1033     std::vector<float> outputData(2);
1034 
1035     std::vector<float> expectedOutput{ 11.0f, -1.0f };
1036 
1037     InputTensors inputTensors
1038     {
1039         { 0, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData0.data()) },
1040         { 1, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 1), inputData1.data()) },
1041         { 2, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 2), inputData2.data()) }
1042     };
1043     OutputTensors outputTensors
1044     {
1045         { 0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data()) }
1046     };
1047 
1048     runtime->GetProfiler(netId)->EnableProfiling(true);
1049 
1050     // Do the inference
1051     runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
1052 
1053     // Retrieve the Profiler.Print() output to get the workload execution
1054     ProfilerManager& profilerManager = armnn::ProfilerManager::GetInstance();
1055     std::stringstream ss;
1056     profilerManager.GetProfiler()->Print(ss);;
1057     std::string dump = ss.str();
1058 
1059     // Executed Subtraction using GpuAcc
1060     std::size_t found = dump.find("ClSubtractionWorkload_Execute");
1061     BOOST_TEST(found != std::string::npos);
1062 
1063     // Correctly switch back to CpuAcc
1064     found = dump.find("NeonPooling2dWorkload_Execute");
1065     BOOST_TEST(found != std::string::npos);
1066 
1067     // Contain CopyMemGeneric
1068     found = dump.find("CopyMemGeneric");
1069     BOOST_TEST(found != std::string::npos);
1070 
1071     // Contains SyncMemGeneric for output
1072     found = dump.find("SyncMemGeneric");
1073     BOOST_TEST(found != std::string::npos);
1074 
1075     // Check output is as expected
1076     BOOST_TEST(outputData == expectedOutput);
1077 }
1078 
BOOST_AUTO_TEST_CASE(NeonImportDisableFallbackSubgraphToCl)1079 BOOST_AUTO_TEST_CASE(NeonImportDisableFallbackSubgraphToCl)
1080 {
1081     using namespace armnn;
1082 
1083     IRuntime::CreationOptions options;
1084     IRuntimePtr runtime(IRuntime::Create(options));
1085 
1086     // Builds up the structure of the network.
1087     INetworkPtr net(INetwork::Create());
1088 
1089     Pooling2dDescriptor desc;
1090 
1091     IConnectableLayer* input0 = net->AddInputLayer(0, "input0");
1092     IConnectableLayer* input1 = net->AddInputLayer(1, "input1");
1093     IConnectableLayer* input2 = net->AddInputLayer(2, "input2");
1094     IConnectableLayer* add = net->AddAdditionLayer("add");
1095     IConnectableLayer* sub = net->AddSubtractionLayer("sub");
1096     IConnectableLayer* pooling = net->AddPooling2dLayer(desc, "pooling");
1097     IConnectableLayer* output = net->AddOutputLayer(0, "output");
1098 
1099     input0->GetOutputSlot(0).Connect(add->GetInputSlot(0));
1100     input1->GetOutputSlot(0).Connect(add->GetInputSlot(1));
1101     input2->GetOutputSlot(0).Connect(sub->GetInputSlot(0));
1102     add->GetOutputSlot(0).Connect(sub->GetInputSlot(1));
1103     sub->GetOutputSlot(0).Connect(pooling->GetInputSlot(0));
1104     pooling->GetOutputSlot(0).Connect(output->GetInputSlot(0));
1105 
1106     TensorInfo info = TensorInfo({ 1, 2, 3, 2 }, DataType::Float32);
1107     TensorInfo poolingInfo = TensorInfo({ 1, 2, 1, 1 }, DataType::Float32);
1108 
1109     input0->GetOutputSlot(0).SetTensorInfo(info);
1110     input1->GetOutputSlot(0).SetTensorInfo(info);
1111     input2->GetOutputSlot(0).SetTensorInfo(info);
1112     add->GetOutputSlot(0).SetTensorInfo(info);
1113     sub->GetOutputSlot(0).SetTensorInfo(info);
1114     pooling->GetOutputSlot(0).SetTensorInfo(poolingInfo);
1115 
1116     std::vector<BackendId> backends = { Compute::CpuAcc, Compute::GpuAcc };
1117     // Use BackendSelectionHint to specify GpuAcc for Subtraction layer
1118     sub->BackendSelectionHint(backends[1]);
1119 
1120     // optimize the network
1121     OptimizerOptions optOptions;
1122     IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec(), optOptions);
1123 
1124     OptimizedNetwork* optNetObjPtr = PolymorphicDowncast<OptimizedNetwork*>(optNet.get());
1125     Graph& graph = optNetObjPtr->GetGraph();
1126 
1127     armnn::Layer* const layer0 = GetFirstLayerWithName(graph, "input0");
1128     armnn::Layer* const layer1 = GetFirstLayerWithName(graph, "input1");
1129     armnn::Layer* const layer2 = GetFirstLayerWithName(graph, "input2");
1130     armnn::Layer* const layer3 = GetFirstLayerWithName(graph, "add");
1131     armnn::Layer* const layer4 = GetFirstLayerWithName(graph, "[ add (0) -> sub (1) ]");
1132     armnn::Layer* const layer5 = GetFirstLayerWithName(graph, "sub");
1133     armnn::Layer* const layer6 = GetFirstLayerWithName(graph, "[ sub (0) -> pooling (0) ]");
1134     armnn::Layer* const layer7 = GetFirstLayerWithName(graph, "pooling");
1135     armnn::Layer* const layer8 = GetFirstLayerWithName(graph, "output");
1136 
1137     // Checks order is valid.
1138     BOOST_TEST(CheckOrder(graph, layer0, layer1));
1139     BOOST_TEST(CheckOrder(graph, layer1, layer2));
1140     BOOST_TEST(CheckOrder(graph, layer2, layer3));
1141     BOOST_TEST(CheckOrder(graph, layer3, layer4));
1142     BOOST_TEST(CheckOrder(graph, layer4, layer5));
1143     BOOST_TEST(CheckOrder(graph, layer5, layer6));
1144     BOOST_TEST(CheckOrder(graph, layer6, layer7));
1145     BOOST_TEST(CheckOrder(graph, layer7, layer8));
1146 
1147     // Use memory import between backends
1148     BOOST_TEST((layer4->GetType() == LayerType::MemCopy));
1149     BOOST_TEST((layer6->GetType() == LayerType::MemCopy));
1150 
1151     // Correctly use backend hint
1152     BOOST_TEST((layer5->GetBackendId() == Compute::GpuAcc ));
1153 
1154     // Load it into the runtime. It should pass.
1155     NetworkId netId;
1156     runtime->LoadNetwork(netId, std::move(optNet));
1157 
1158     // Creates structures for input & output
1159     std::vector<float> inputData0
1160     {
1161         1.0f, 1.0f, 2.0f, 2.0f, 2.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f, 6.0f
1162     };
1163     std::vector<float> inputData1
1164     {
1165         0.0f, 1.0f, 1.0f, 2.0f, 3.0f, 3.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f
1166     };
1167     std::vector<float> inputData2
1168     {
1169         12.0f, 11.0f, 10.0f, 9.0f, 8.0f, 7.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f, 1.0f
1170     };
1171 
1172     std::vector<float> outputData(2);
1173 
1174     std::vector<float> expectedOutput{ 11.0f, -1.0f };
1175 
1176     InputTensors inputTensors
1177     {
1178         { 0, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData0.data()) },
1179         { 1, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 1), inputData1.data()) },
1180         { 2, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 2), inputData2.data()) }
1181     };
1182     OutputTensors outputTensors
1183     {
1184         { 0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data()) }
1185     };
1186 
1187     runtime->GetProfiler(netId)->EnableProfiling(true);
1188 
1189     // Do the inference
1190     runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
1191 
1192     // Retrieve the Profiler.Print() output to get the workload execution
1193     ProfilerManager& profilerManager = armnn::ProfilerManager::GetInstance();
1194     std::stringstream ss;
1195     profilerManager.GetProfiler()->Print(ss);;
1196     std::string dump = ss.str();
1197 
1198     // Executed Subtraction using GpuAcc
1199     std::size_t found = dump.find("ClSubtractionWorkload_Execute");
1200     BOOST_TEST(found != std::string::npos);
1201 
1202     // Correctly switch back to CpuAcc
1203     found = dump.find("NeonPooling2dWorkload_Execute");
1204     BOOST_TEST(found != std::string::npos);
1205 
1206     // Contain CopyMemGeneric
1207     found = dump.find("CopyMemGeneric");
1208     BOOST_TEST(found != std::string::npos);
1209 
1210     // Check output is as expected
1211     BOOST_TEST(outputData == expectedOutput);
1212 }
1213 #endif
1214 
1215 BOOST_AUTO_TEST_SUITE_END()
1216