1 //
2 // Copyright © 2017 Arm Ltd and Contributors. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5
6 #include "LoadedNetwork.hpp"
7 #include "Layer.hpp"
8 #include "Graph.hpp"
9 #include "Network.hpp"
10 #include <Processes.hpp>
11 #include "Profiling.hpp"
12 #include "HeapProfiling.hpp"
13
14 #include <armnn/BackendRegistry.hpp>
15 #include <armnn/Logging.hpp>
16 #include <armnn/utility/Assert.hpp>
17
18 #include <backendsCommon/CpuTensorHandle.hpp>
19 #include <armnn/backends/IMemoryManager.hpp>
20 #include <backendsCommon/MemCopyWorkload.hpp>
21 #include <backendsCommon/MemSyncWorkload.hpp>
22
23 #include <LabelsAndEventClasses.hpp>
24
25 #include <fmt/format.h>
26
27 namespace armnn
28 {
29
30 using namespace std;
31 using namespace armnn::profiling;
32
33 namespace
34 {
35
36 template <typename ExceptionType>
ToErrorMessage(const char * prefix,const ExceptionType & error)37 std::string ToErrorMessage(const char * prefix, const ExceptionType & error)
38 {
39 std::stringstream ss;
40 ss << prefix << " " << error.what();
41 return ss.str();
42 }
43
AddLayerStructure(std::unique_ptr<TimelineUtilityMethods> & timelineUtils,const Layer & layer,ProfilingGuid networkGuid)44 void AddLayerStructure(std::unique_ptr<TimelineUtilityMethods>& timelineUtils,
45 const Layer& layer,
46 ProfilingGuid networkGuid)
47 {
48 // Add layer to the post-optimisation network structure
49 std::string layerName = layer.GetNameStr().empty() ? "<Unnamed>" : layer.GetNameStr();
50 timelineUtils->CreateNamedTypedChildEntity(layer.GetGuid(),
51 networkGuid,
52 layerName,
53 LabelsAndEventClasses::LAYER_GUID);
54 for (auto&& input : layer.GetInputSlots())
55 {
56 const IOutputSlot* source = input.GetConnectedOutputSlot();
57 ARMNN_ASSERT(source != NULL);
58 timelineUtils->CreateConnectionRelationship(ProfilingRelationshipType::RetentionLink,
59 source->GetOwningLayerGuid(),
60 layer.GetGuid());
61 }
62 }
63
AddWorkloadStructure(std::unique_ptr<TimelineUtilityMethods> & timelineUtils,std::unique_ptr<IWorkload> & workload,const Layer & layer)64 void AddWorkloadStructure(std::unique_ptr<TimelineUtilityMethods>& timelineUtils,
65 std::unique_ptr<IWorkload>& workload,
66 const Layer& layer)
67 {
68 // Add workload to the post-optimisation network structure
69 timelineUtils->CreateTypedEntity(workload->GetGuid(), LabelsAndEventClasses::WORKLOAD_GUID);
70 timelineUtils->MarkEntityWithLabel(workload->GetGuid(),
71 layer.GetBackendId().Get(),
72 LabelsAndEventClasses::BACKENDID_GUID);
73
74 // Link the workload to the layer
75 timelineUtils->CreateRelationship(ProfilingRelationshipType::RetentionLink,
76 layer.GetGuid(),
77 workload->GetGuid(),
78 LabelsAndEventClasses::CHILD_GUID);
79 }
80
81 } // anonymous
82
MakeLoadedNetwork(std::unique_ptr<OptimizedNetwork> net,std::string & errorMessage,const INetworkProperties & networkProperties,profiling::ProfilingService & profilingService)83 std::unique_ptr<LoadedNetwork> LoadedNetwork::MakeLoadedNetwork(std::unique_ptr<OptimizedNetwork> net,
84 std::string& errorMessage,
85 const INetworkProperties& networkProperties,
86 profiling::ProfilingService& profilingService)
87 {
88 std::unique_ptr<LoadedNetwork> loadedNetwork;
89
90 auto Fail = [&](const std::exception& error) -> std::unique_ptr<LoadedNetwork>
91 {
92 errorMessage = ToErrorMessage("An error occurred when preparing the network workloads: ", error);
93 ARMNN_LOG(error) << errorMessage;
94
95 return std::unique_ptr<LoadedNetwork>();
96 };
97
98 try
99 {
100 loadedNetwork.reset(new LoadedNetwork(std::move(net), networkProperties, profilingService));
101 }
102 catch (const armnn::RuntimeException& error)
103 {
104 return Fail(error);
105 }
106 catch (const armnn::Exception& error)
107 {
108 return Fail(error);
109 }
110 catch (const std::runtime_error& error)
111 {
112 return Fail(error);
113 }
114
115 return loadedNetwork;
116 }
117
LoadedNetwork(std::unique_ptr<OptimizedNetwork> net,const INetworkProperties & networkProperties,profiling::ProfilingService & profilingService)118 LoadedNetwork::LoadedNetwork(std::unique_ptr<OptimizedNetwork> net,
119 const INetworkProperties& networkProperties,
120 profiling::ProfilingService& profilingService) :
121 m_OptimizedNetwork(std::move(net)),
122 m_IsImportEnabled(networkProperties.m_ImportEnabled),
123 m_IsExportEnabled(networkProperties.m_ExportEnabled),
124 m_TensorHandleFactoryRegistry(),
125 m_ProfilingService(profilingService)
126 {
127 // Create a profiler and register it for the current thread.
128 m_Profiler = std::make_shared<Profiler>();
129 ProfilerManager::GetInstance().RegisterProfiler(m_Profiler.get());
130
131 Graph& order = m_OptimizedNetwork->GetGraph().TopologicalSort();
132 //First create tensor handlers, backends and workload factories.
133 //Handlers are created before workloads are.
134 //Because workload creation can modify some of the handlers,
135 //(for example the splitter and concat layers).
136 for (auto&& layer : order)
137 {
138 auto const& backendId = layer->GetBackendId();
139 if (m_Backends.count(backendId) == 0)
140 {
141 auto createBackend = BackendRegistryInstance().GetFactory(backendId);
142 auto it = m_Backends.emplace(std::make_pair(backendId, createBackend()));
143
144 IBackendInternal* backend = it.first->second.get();
145
146 if (backend->SupportsTensorAllocatorAPI())
147 {
148 auto workloadFactory = backend->CreateWorkloadFactory(
149 m_TensorHandleFactoryRegistry, m_OptimizedNetwork->GetModelOptions());
150 m_WorkloadFactories.emplace(
151 std::make_pair(backendId, std::make_pair(std::move(workloadFactory), nullptr)));
152 }
153 else
154 {
155 IBackendInternal::IMemoryManagerSharedPtr memoryManager = backend->CreateMemoryManager();
156 auto workloadFactory = backend->CreateWorkloadFactory(
157 memoryManager, m_OptimizedNetwork->GetModelOptions());
158
159 m_WorkloadFactories.emplace(
160 std::make_pair(backendId, std::make_pair(std::move(workloadFactory), memoryManager)));
161 }
162 }
163 }
164
165 for (auto&& layer : order)
166 {
167 auto& workloadFactory = GetWorkloadFactory(*layer);
168
169 switch (layer->GetType())
170 {
171 case LayerType::Input:
172 case LayerType::MemImport:
173 {
174 // If IsImportEnabled is true then we need to set IsMemoryManaged to false when creating TensorHandles
175 layer->CreateTensorHandles(m_TensorHandleFactoryRegistry, workloadFactory, !m_IsImportEnabled);
176 break;
177 }
178 default:
179 {
180 // Look for the layer with 1 OutputSlot which has 1 connection and that connection is an Output Layer
181 // If Export is enabled disable memory management so we can export, otherwise we do a copy
182 if((layer->GetNumOutputSlots() == 1) &&
183 (layer->GetOutputSlots()[0].GetNumConnections() == 1) &&
184 (layer->GetOutputSlots()[0].GetConnection(0)->GetOwningLayer().GetType() == LayerType::Output))
185 {
186 layer->CreateTensorHandles(m_TensorHandleFactoryRegistry, workloadFactory, !m_IsExportEnabled);
187 }
188 else
189 {
190 layer->CreateTensorHandles(m_TensorHandleFactoryRegistry, workloadFactory);
191 }
192 }
193 }
194 }
195
196 ProfilingGuid networkGuid = m_OptimizedNetwork->GetGuid();
197 std::unique_ptr<TimelineUtilityMethods> timelineUtils =
198 TimelineUtilityMethods::GetTimelineUtils(m_ProfilingService);
199 if (timelineUtils)
200 {
201 timelineUtils->CreateTypedEntity(networkGuid, LabelsAndEventClasses::NETWORK_GUID);
202 // Mark the network with a start of life event
203 timelineUtils->RecordEvent(networkGuid, LabelsAndEventClasses::ARMNN_PROFILING_SOL_EVENT_CLASS);
204 // and with the process ID
205 int processID = armnnUtils::Processes::GetCurrentId();
206 std::stringstream ss;
207 ss << processID;
208 timelineUtils->MarkEntityWithLabel(networkGuid, ss.str(), LabelsAndEventClasses::PROCESS_ID_GUID);
209 }
210
211 //Then create workloads.
212 for (auto&& layer : order)
213 {
214 if (timelineUtils)
215 {
216 // Add layer to the post-optimisation network structure
217 AddLayerStructure(timelineUtils, *layer, networkGuid);
218 }
219
220 const IWorkloadFactory& workloadFactory = GetWorkloadFactory(*layer);
221
222 switch (layer->GetType())
223 {
224 case LayerType::Input:
225 case LayerType::Output:
226 {
227 // Inputs and outputs are treated in a special way - see EnqueueInput() and EnqueueOutput().
228 break;
229 }
230 default:
231 {
232 auto workload = layer->CreateWorkload(workloadFactory);
233
234 if (!workload)
235 {
236 const char* const layerName =
237 layer->GetNameStr().length() != 0 ? layer->GetName() : "<Unnamed>";
238 throw InvalidArgumentException(
239 fmt::format("No workload created for layer (name: '{0}' type: '{1}') (compute '{2}')",
240 layerName, static_cast<int>(layer->GetType()), layer->GetBackendId().Get()
241 ));
242 }
243
244 if (timelineUtils)
245 {
246 // Add workload to the post-optimisation network structure
247 AddWorkloadStructure(timelineUtils, workload, *layer);
248 }
249
250 m_WorkloadQueue.push_back(move(workload));
251 // release the constant data in the layer..
252 layer->ReleaseConstantData();
253 break;
254 }
255 }
256 }
257
258 for (auto&& workloadFactory : m_WorkloadFactories)
259 {
260 workloadFactory.second.first->AfterWorkloadsCreated();
261 }
262
263 if (timelineUtils)
264 {
265 // Commit to send the post-optimisation network structure
266 timelineUtils->Commit();
267 }
268
269 // Set up memory.
270 m_OptimizedNetwork->GetGraph().AllocateDynamicBuffers();
271
272 // Now that the intermediate tensor memory has been set-up, do any post allocation configuration for each workload.
273 for (auto& workload : m_WorkloadQueue)
274 {
275 workload->PostAllocationConfigure();
276 }
277 }
278
SendNetworkStructure()279 void LoadedNetwork::SendNetworkStructure()
280 {
281 Graph& order = m_OptimizedNetwork->GetGraph().TopologicalSort();
282 ProfilingGuid networkGuid = m_OptimizedNetwork->GetGuid();
283
284 std::unique_ptr<TimelineUtilityMethods> timelineUtils =
285 TimelineUtilityMethods::GetTimelineUtils(m_ProfilingService);
286
287 timelineUtils->CreateTypedEntity(networkGuid, LabelsAndEventClasses::NETWORK_GUID);
288
289 for (auto&& layer : order)
290 {
291 // Add layer to the post-optimisation network structure
292 AddLayerStructure(timelineUtils, *layer, networkGuid);
293 switch (layer->GetType())
294 {
295 case LayerType::Input:
296 case LayerType::Output:
297 {
298 // Inputs and outputs are treated in a special way - see EnqueueInput() and EnqueueOutput().
299 break;
300 }
301 default:
302 {
303 for (auto& workload : m_WorkloadQueue)
304 {
305 // Add workload to the post-optimisation network structure
306 AddWorkloadStructure(timelineUtils, workload, *layer);
307 }
308 break;
309 }
310 }
311 }
312 // Commit to send the post-optimisation network structure
313 timelineUtils->Commit();
314 }
315
GetNetworkGuid()316 profiling::ProfilingGuid LoadedNetwork::GetNetworkGuid()
317 {
318 return m_OptimizedNetwork->GetGuid();
319 }
320
GetInputTensorInfo(LayerBindingId layerId) const321 TensorInfo LoadedNetwork::GetInputTensorInfo(LayerBindingId layerId) const
322 {
323 for (auto&& inputLayer : m_OptimizedNetwork->GetGraph().GetInputLayers())
324 {
325 ARMNN_ASSERT_MSG(inputLayer->GetNumOutputSlots() == 1, "Input layer should have exactly 1 output slot");
326 if (inputLayer->GetBindingId() == layerId)
327 {
328 return inputLayer->GetOutputSlot(0).GetTensorInfo();
329 }
330 }
331
332 throw InvalidArgumentException(fmt::format("No input layer is associated with id {}", layerId));
333 }
334
GetOutputTensorInfo(LayerBindingId layerId) const335 TensorInfo LoadedNetwork::GetOutputTensorInfo(LayerBindingId layerId) const
336 {
337 for (auto&& outputLayer : m_OptimizedNetwork->GetGraph().GetOutputLayers())
338 {
339 ARMNN_ASSERT_MSG(outputLayer->GetNumInputSlots() == 1, "Output layer should have exactly 1 input slot");
340 ARMNN_ASSERT_MSG(outputLayer->GetInputSlot(0).GetConnection(), "Input slot on Output layer must be connected");
341 if (outputLayer->GetBindingId() == layerId)
342 {
343 return outputLayer->GetInputSlot(0).GetConnection()->GetTensorInfo();
344 }
345 }
346
347 throw InvalidArgumentException(fmt::format("No output layer is associated with id {}", layerId));
348 }
349
GetWorkloadFactory(const Layer & layer) const350 const IWorkloadFactory& LoadedNetwork::GetWorkloadFactory(const Layer& layer) const
351 {
352 const IWorkloadFactory* workloadFactory = nullptr;
353
354 auto it = m_WorkloadFactories.find(layer.GetBackendId());
355 if (it == m_WorkloadFactories.end())
356 {
357 throw RuntimeException(fmt::format("No workload factory for {0} to be used for layer: {1}",
358 layer.GetBackendId().Get(),
359 layer.GetNameStr()),
360 CHECK_LOCATION());
361 }
362
363 workloadFactory = it->second.first.get();
364
365 ARMNN_ASSERT_MSG(workloadFactory, "No workload factory");
366
367 std::string reasonIfUnsupported;
368 ARMNN_ASSERT_MSG(IWorkloadFactory::IsLayerSupported(layer,
369 {},
370 reasonIfUnsupported,
371 m_OptimizedNetwork->GetModelOptions()),
372 "Factory does not support layer");
373 IgnoreUnused(reasonIfUnsupported);
374 return *workloadFactory;
375 }
376
377 namespace {
378
379 // Non-copyable class owning accelerator-specific tensor data.
380 class TensorPin
381 {
382 public:
TensorPin(std::unique_ptr<ITensorHandle> handle,const TensorInfo & info,LayerBindingId id)383 TensorPin(std::unique_ptr<ITensorHandle> handle, const TensorInfo& info, LayerBindingId id)
384 : m_TensorHandle(std::move(handle))
385 , m_TensorInfo(info)
386 , m_Id(id)
387 {
388 }
389
GetTensorHandle() const390 ITensorHandle* GetTensorHandle() const { return m_TensorHandle.get(); }
GetTensorInfo() const391 const TensorInfo& GetTensorInfo() const { return m_TensorInfo; }
GetBindingId() const392 LayerBindingId GetBindingId() const { return m_Id; }
393
394 private:
395 std::unique_ptr<ITensorHandle> m_TensorHandle;
396 TensorInfo m_TensorInfo;
397 LayerBindingId m_Id;
398 };
399
GetTensorPin(LayerBindingId id,const std::vector<TensorPin> & pins,char const * bindingPointDesc)400 static const TensorPin& GetTensorPin(LayerBindingId id,
401 const std::vector<TensorPin>& pins,
402 char const* bindingPointDesc)
403 {
404 auto it = std::find_if(pins.begin(), pins.end(),
405 [id](const TensorPin& pin)
406 {
407 return pin.GetBindingId() == id;
408 });
409
410 if (it != pins.end())
411 {
412 return *it;
413 }
414 else
415 {
416 throw InvalidArgumentException(fmt::format("No tensor supplied for {0} {1}", bindingPointDesc, id));
417 }
418 }
419
420 // Stores data that needs to be kept accessible for the entire execution of a workload.
421 class WorkloadData
422 {
423 public:
WorkloadData(const InputTensors & inputTensors,const OutputTensors & outputTensors)424 WorkloadData(const InputTensors& inputTensors, const OutputTensors& outputTensors)
425 {
426 m_InputTensorPins.reserve(inputTensors.size());
427 m_OutputTensorPins.reserve(outputTensors.size());
428
429 for (auto inputTensorPair : inputTensors)
430 {
431 auto inputTensor = inputTensorPair.second;
432
433 std::unique_ptr<ITensorHandle> tensorHandle =
434 std::make_unique<ConstPassthroughCpuTensorHandle>(inputTensor.GetInfo(),inputTensor.GetMemoryArea());
435 LayerBindingId layerId = inputTensorPair.first;
436
437 m_InputTensorPins.emplace_back(std::move(tensorHandle), inputTensor.GetInfo(), layerId);
438 }
439
440 for (auto outputTensorPair : outputTensors)
441 {
442 auto outputTensor = outputTensorPair.second;
443
444 std::unique_ptr<ITensorHandle> tensorHandle =
445 std::make_unique<PassthroughCpuTensorHandle>(outputTensor.GetInfo(), outputTensor.GetMemoryArea());
446 LayerBindingId layerId = outputTensorPair.first;
447
448 m_OutputTensorPins.emplace_back(std::move(tensorHandle), outputTensor.GetInfo(), layerId);
449 }
450 }
451
GetInputTensorPin(LayerBindingId id) const452 const TensorPin& GetInputTensorPin(LayerBindingId id) const
453 {
454 return GetTensorPin(id, m_InputTensorPins, "input");
455 }
456
GetOutputTensorPin(LayerBindingId id) const457 const TensorPin& GetOutputTensorPin(LayerBindingId id) const
458 {
459 return GetTensorPin(id, m_OutputTensorPins, "output");
460 }
461
462 private:
463
464 std::vector<TensorPin> m_InputTensorPins;
465 std::vector<TensorPin> m_OutputTensorPins;
466 };
467
468 }
469
EnqueueWorkload(const InputTensors & inputTensors,const OutputTensors & outputTensors)470 Status LoadedNetwork::EnqueueWorkload(const InputTensors& inputTensors,
471 const OutputTensors& outputTensors)
472 {
473 const Graph& graph = m_OptimizedNetwork->GetGraph();
474
475 // Walk graph to determine the order of execution.
476 if (graph.GetNumLayers() < 2)
477 {
478 ARMNN_LOG(warning) << "IRuntime::EnqueueWorkload()::Less than two nodes in graph";
479 return Status::Failure;
480 }
481
482 // Data that must be kept alive for the entire execution of the workload.
483 WorkloadData workloadData(inputTensors, outputTensors);
484
485 if (graph.GetNumInputs() != inputTensors.size())
486 {
487 throw InvalidArgumentException("Number of inputs provided does not match network.");
488 }
489
490 // For each input to the network, call EnqueueInput with the data passed by the user.
491 {
492 ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "PrepareInputs");
493 m_InputQueue.clear();
494 m_InputQueue.reserve(graph.GetNumInputs());
495 for (const BindableLayer* inputLayer : graph.GetInputLayers())
496 {
497 const TensorPin& pin = workloadData.GetInputTensorPin(inputLayer->GetBindingId());
498 EnqueueInput(*inputLayer, pin.GetTensorHandle(), pin.GetTensorInfo());
499 }
500 }
501
502 // For each output to the network, call EnqueueOutput with the data passed by the user.
503 {
504 ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "PrepareOutputs");
505 m_OutputQueue.clear();
506 m_OutputQueue.reserve(graph.GetNumOutputs());
507 for (const BindableLayer* outputLayer : graph.GetOutputLayers())
508 {
509 const TensorPin& pin = workloadData.GetOutputTensorPin(outputLayer->GetBindingId());
510 EnqueueOutput(*outputLayer, pin.GetTensorHandle(), pin.GetTensorInfo());
511 }
512 }
513
514 std::unique_ptr<TimelineUtilityMethods> timelineUtils =
515 TimelineUtilityMethods::GetTimelineUtils(m_ProfilingService);
516 ProfilingGuid inferenceGuid = m_ProfilingService.GetNextGuid();
517 if (timelineUtils)
518 {
519 // Add inference timeline trace if profiling is enabled.
520 ProfilingGuid networkGuid = m_OptimizedNetwork->GetGuid();
521 timelineUtils->CreateTypedEntity(inferenceGuid, LabelsAndEventClasses::INFERENCE_GUID);
522 timelineUtils->CreateRelationship(ProfilingRelationshipType::RetentionLink,
523 networkGuid,
524 inferenceGuid,
525 LabelsAndEventClasses::EXECUTION_OF_GUID);
526 timelineUtils->RecordEvent(inferenceGuid, LabelsAndEventClasses::ARMNN_PROFILING_SOL_EVENT_CLASS);
527 }
528
529 bool executionSucceeded = true;
530
531 {
532 if (m_ProfilingService.IsProfilingEnabled())
533 {
534 m_ProfilingService.IncrementCounterValue(armnn::profiling::INFERENCES_RUN);
535 }
536 ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "Execute");
537 ARMNN_SCOPED_HEAP_PROFILING("Executing");
538 executionSucceeded = Execute(timelineUtils, inferenceGuid);
539 }
540
541 if (timelineUtils)
542 {
543 // Add end of life of the inference timeline if profiling is enabled.
544 timelineUtils->RecordEvent(inferenceGuid, LabelsAndEventClasses::ARMNN_PROFILING_EOL_EVENT_CLASS);
545 timelineUtils->Commit();
546 }
547 return executionSucceeded ? Status::Success : Status::Failure;
548 }
549
EnqueueInput(const BindableLayer & layer,ITensorHandle * tensorHandle,const TensorInfo & tensorInfo)550 void LoadedNetwork::EnqueueInput(const BindableLayer& layer, ITensorHandle* tensorHandle, const TensorInfo& tensorInfo)
551 {
552 if (layer.GetType() != LayerType::Input)
553 {
554 throw InvalidArgumentException("EnqueueInput: given layer not an InputLayer");
555 }
556
557 if (tensorHandle == nullptr)
558 {
559 throw InvalidArgumentException("EnqueueInput: tensorHandle must not be NULL");
560 }
561
562 InputQueueDescriptor inputQueueDescriptor;
563 WorkloadInfo info;
564
565 inputQueueDescriptor.m_Inputs.push_back(tensorHandle);
566 info.m_InputTensorInfos.push_back(tensorInfo);
567
568 ARMNN_ASSERT_MSG(layer.GetNumOutputSlots() == 1, "Can only handle Input Layer with one output");
569 const OutputHandler& handler = layer.GetOutputHandler();
570 const TensorInfo& outputTensorInfo = handler.GetTensorInfo();
571 ITensorHandle* outputTensorHandle = handler.GetData();
572 ARMNN_ASSERT_MSG(outputTensorHandle != nullptr,
573 "Data should have been allocated.");
574 inputQueueDescriptor.m_Outputs.push_back(outputTensorHandle);
575 info.m_OutputTensorInfos.push_back(outputTensorInfo);
576
577 MemorySourceFlags importFlags = outputTensorHandle->GetImportFlags();
578 bool needMemCopy = true;
579 if (m_IsImportEnabled) // Try import the input tensor
580 {
581 if(CheckFlag(importFlags, MemorySource::Malloc) )
582 {
583 needMemCopy = false;
584 // This assumes a CPU Tensor handle
585 void* mem = tensorHandle->Map(false);
586 if (outputTensorHandle->Import(mem, MemorySource::Malloc))
587 {
588 tensorHandle->Unmap();
589 return; // No need for a workload since the import has been done.
590 }
591 tensorHandle->Unmap();
592 throw MemoryImportException("EnqueueInput: Memory Import failed");
593 }
594 }
595 if (needMemCopy)
596 {
597 // Create a mem copy workload for input since we did not import
598 std::unique_ptr<IWorkload> inputWorkload = std::make_unique<CopyMemGenericWorkload>(inputQueueDescriptor, info);
599
600 ARMNN_ASSERT_MSG(inputWorkload, "No input workload created");
601
602 std::unique_ptr<TimelineUtilityMethods> timelineUtils =
603 TimelineUtilityMethods::GetTimelineUtils(m_ProfilingService);
604 if (timelineUtils)
605 {
606 // Add Input Workload to the post-optimisation network structure
607 AddWorkloadStructure(timelineUtils, inputWorkload, layer);
608 timelineUtils->Commit();
609 }
610
611 m_InputQueue.push_back(move(inputWorkload));
612 }
613 }
614
EnqueueOutput(const BindableLayer & layer,ITensorHandle * tensorHandle,const TensorInfo & tensorInfo)615 void LoadedNetwork::EnqueueOutput(const BindableLayer& layer, ITensorHandle* tensorHandle, const TensorInfo& tensorInfo)
616 {
617 if (layer.GetType() != LayerType::Output)
618 {
619 throw InvalidArgumentException("EnqueueOutput: given layer not an OutputLayer");
620 }
621
622 if (tensorHandle == nullptr)
623 {
624 throw InvalidArgumentException("EnqueueOutput: tensorHandle must not be NULL");
625 }
626
627 OutputQueueDescriptor outputQueueDescriptor;
628 WorkloadInfo info;
629
630 outputQueueDescriptor.m_Outputs.push_back(tensorHandle);
631 info.m_OutputTensorInfos.push_back(tensorInfo);
632
633 ARMNN_ASSERT_MSG(layer.GetNumInputSlots() == 1, "Output Layer should have exactly one input.");
634
635 // Gets the output handler from the previous node.
636 const OutputHandler& outputHandler = layer.GetInputSlots()[0].GetConnectedOutputSlot()->GetOutputHandler();
637
638 const TensorInfo& inputTensorInfo = outputHandler.GetTensorInfo();
639 ITensorHandle* inputTensorHandle = outputHandler.GetData();
640 ARMNN_ASSERT_MSG(inputTensorHandle != nullptr, "Data should have been allocated.");
641
642 // Try import the output tensor.
643 // Note: We can only import the output pointer if all of the following hold true:
644 // a) The imported pointer is aligned sufficiently
645 // b) The tensor has zero padding
646 // c) There is only one connection to the OutputSlot and it is to an OutputLayer.
647 // d) The output pointer is allocated via malloc. (Other types will be supported in a later release)
648 // e) m_IsExportEnabled must be set to true
649 bool needMemCopy = true;
650 if (m_IsExportEnabled && (layer.GetInputSlots()[0].GetConnectedOutputSlot()->GetNumConnections() == 1))
651 {
652 if(layer.GetInputSlots()[0].GetConnectedOutputSlot()->GetOwningLayer().GetType() != LayerType::Input)
653 {
654 MemorySourceFlags importFlags = inputTensorHandle->GetImportFlags();
655 if (CheckFlag(importFlags, MemorySource::Malloc))
656 {
657 needMemCopy = false;
658 void *mem = tensorHandle->Map(false);
659 bool importOk = inputTensorHandle->Import(mem, MemorySource::Malloc);
660 tensorHandle->Unmap();
661
662 if (importOk)
663 {
664 // Insert synchronization workload
665 MemSyncQueueDescriptor syncDesc;
666 syncDesc.m_Inputs.push_back(inputTensorHandle);
667 info.m_InputTensorInfos.push_back(inputTensorInfo);
668 auto syncWorkload = std::make_unique<SyncMemGenericWorkload>(syncDesc, info);
669 ARMNN_ASSERT_MSG(syncWorkload, "No sync workload created");
670 m_OutputQueue.push_back(move(syncWorkload));
671 }
672 else
673 {
674 throw MemoryExportException("EnqueueOutput: Memory Export failed");
675 }
676 }
677 }
678 }
679 if (needMemCopy)
680 {
681 // If we got here then we didn't export the memory, so add an output workload which performs a memcopy.
682 outputQueueDescriptor.m_Inputs.push_back(inputTensorHandle);
683 info.m_InputTensorInfos.push_back(inputTensorInfo);
684
685 std::unique_ptr<IWorkload> outputWorkload =
686 std::make_unique<CopyMemGenericWorkload>(outputQueueDescriptor, info);
687 ARMNN_ASSERT_MSG(outputWorkload, "No output workload created");
688
689 std::unique_ptr<TimelineUtilityMethods> timelineUtils =
690 TimelineUtilityMethods::GetTimelineUtils(m_ProfilingService);
691 if (timelineUtils)
692 {
693 // Add Output Workload to the post-optimisation network structure
694 AddWorkloadStructure(timelineUtils, outputWorkload, layer);
695 timelineUtils->Commit();
696 }
697
698 m_OutputQueue.push_back(move(outputWorkload));
699 }
700 }
701
AllocateWorkingMemory(std::lock_guard<std::mutex> & lock)702 void LoadedNetwork::AllocateWorkingMemory(std::lock_guard<std::mutex>& lock)
703 {
704 ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "Working Memory Allocation");
705
706 // this unused parameter makes sure we can only call this function with a valid lock
707 IgnoreUnused(lock);
708
709 if (m_IsWorkingMemAllocated)
710 {
711 return;
712 }
713 for (auto&& workloadFactory : m_WorkloadFactories)
714 {
715 IBackendInternal::IMemoryManagerSharedPtr memoryManager = workloadFactory.second.second;
716 if (memoryManager)
717 {
718 memoryManager->Acquire();
719 }
720 }
721 m_TensorHandleFactoryRegistry.AquireMemory();
722 m_IsWorkingMemAllocated = true;
723 }
724
FreeWorkingMemory()725 void LoadedNetwork::FreeWorkingMemory()
726 {
727 std::lock_guard<std::mutex> lockGuard(m_WorkingMemMutex);
728 if (!m_IsWorkingMemAllocated)
729 {
730 return;
731 }
732 // Informs the memory managers to release memory in it's respective memory group
733 for (auto&& workloadFactory : m_WorkloadFactories)
734 {
735 IBackendInternal::IMemoryManagerSharedPtr memoryManager = workloadFactory.second.second;
736 if (memoryManager)
737 {
738 memoryManager->Release();
739 }
740 }
741 m_TensorHandleFactoryRegistry.ReleaseMemory();
742 m_IsWorkingMemAllocated = false;
743 }
744
Execute(std::unique_ptr<TimelineUtilityMethods> & timelineUtils,profiling::ProfilingGuid inferenceGuid)745 bool LoadedNetwork::Execute(std::unique_ptr<TimelineUtilityMethods>& timelineUtils,
746 profiling::ProfilingGuid inferenceGuid)
747 {
748 bool success = true;
749
750 auto Fail = [&](const std::exception& error)
751 {
752 ARMNN_LOG(error) << "An error occurred attempting to execute a workload: " << error.what();
753 success = false;
754 };
755
756 try
757 {
758 std::lock_guard<std::mutex> lockGuard(m_WorkingMemMutex);
759 AllocateWorkingMemory(lockGuard);
760
761 ProfilingDynamicGuid workloadInferenceID(0);
762 auto ExecuteQueue = [&timelineUtils, &workloadInferenceID, &inferenceGuid](WorkloadQueue& queue)
763 {
764 for (auto& workload : queue)
765 {
766 if(timelineUtils)
767 {
768 workloadInferenceID = timelineUtils->RecordWorkloadInferenceAndStartOfLifeEvent(workload->GetGuid(),
769 inferenceGuid);
770 }
771 workload->Execute();
772 if(timelineUtils)
773 {
774 timelineUtils->RecordEndOfLifeEvent(workloadInferenceID);
775 }
776 }
777 };
778
779 ExecuteQueue(m_InputQueue);
780 ExecuteQueue(m_WorkloadQueue);
781 ExecuteQueue(m_OutputQueue);
782 }
783 catch (const RuntimeException& error)
784 {
785 Fail(error);
786 }
787 catch (const std::runtime_error& error)
788 {
789 Fail(error);
790 }
791
792 return success;
793 }
794
RegisterDebugCallback(const DebugCallbackFunction & func)795 void LoadedNetwork::RegisterDebugCallback(const DebugCallbackFunction& func)
796 {
797 for (auto&& workloadPtr: m_WorkloadQueue)
798 {
799 workloadPtr.get()->RegisterDebugCallback(func);
800 }
801 }
802
803 }
804