1 //
2 // Copyright © 2017 Arm Ltd and Contributors. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5
6 #include "Network.hpp"
7 #include "Graph.hpp"
8 #include "Layer.hpp"
9 #include "DeviceSpec.hpp"
10 #include "Optimizer.hpp"
11 #include "SubgraphViewSelector.hpp"
12 #include "BackendSettings.hpp"
13 #include "optimizations/All.hpp"
14
15 #include <backendsCommon/CpuTensorHandle.hpp>
16 #include <backendsCommon/WorkloadFactory.hpp>
17 #include <armnn/backends/IBackendInternal.hpp>
18 #include <backendsCommon/TensorHandleFactoryRegistry.hpp>
19
20 #include <armnn/Exceptions.hpp>
21 #include <armnn/Utils.hpp>
22 #include <armnn/TypesUtils.hpp>
23 #include <armnn/BackendRegistry.hpp>
24 #include <armnn/Logging.hpp>
25 #include <armnn/utility/Assert.hpp>
26 #include <armnn/utility/IgnoreUnused.hpp>
27 #include <armnn/utility/PolymorphicDowncast.hpp>
28
29 #include <ProfilingService.hpp>
30
31 #include <fcntl.h>
32 #include <algorithm>
33 #include <fstream>
34 #include <memory>
35 #include <vector>
36 #include <algorithm>
37
38 namespace armnn
39 {
40
CreateRaw(NetworkOptions networkOptions)41 armnn::INetwork* INetwork::CreateRaw(NetworkOptions networkOptions)
42 {
43 return new Network(networkOptions);
44 }
45
Create(NetworkOptions networkOptions)46 armnn::INetworkPtr INetwork::Create(NetworkOptions networkOptions)
47 {
48 return INetworkPtr(CreateRaw(networkOptions), &INetwork::Destroy);
49 }
50
Destroy(INetwork * network)51 void INetwork::Destroy(INetwork* network)
52 {
53 delete PolymorphicDowncast<Network*>(network);
54 }
55
Destroy(IOptimizedNetwork * network)56 void IOptimizedNetwork::Destroy(IOptimizedNetwork* network)
57 {
58 delete PolymorphicDowncast<OptimizedNetwork*>(network);
59 }
60
PrintGraph()61 Status OptimizedNetwork::PrintGraph()
62 {
63 m_Graph->Print();
64 return Status::Success;
65 }
66
SerializeToDot(std::ostream & stream) const67 Status OptimizedNetwork::SerializeToDot(std::ostream& stream) const
68 {
69 return m_Graph->SerializeToDot(stream);
70 }
71
ReportError(const std::string & errorMessage,Optional<std::vector<std::string> &> errorMessages)72 void ReportError(const std::string& errorMessage,
73 Optional<std::vector<std::string>&> errorMessages)
74 {
75 std::stringstream fullErrorMessage;
76 fullErrorMessage << "ERROR: " << errorMessage;
77 ARMNN_LOG(warning) << fullErrorMessage.str();
78 if (errorMessages)
79 {
80 errorMessages.value().push_back(fullErrorMessage.str());
81 }
82 }
83
ReportWarning(const std::string & warningMessage,Optional<std::vector<std::string> &> warningMessages)84 void ReportWarning(const std::string& warningMessage,
85 Optional<std::vector<std::string>&> warningMessages)
86 {
87 std::stringstream fullWarningMessage;
88 fullWarningMessage << "WARNING: " << warningMessage;
89 ARMNN_LOG(warning) << fullWarningMessage.str();
90 if (warningMessages)
91 {
92 warningMessages.value().push_back(fullWarningMessage.str());
93 }
94 }
95
ReturnWithError(OptimizationResult res,const Layer * layer,const BackendSettings & backendSettings,Optional<std::vector<std::string> &> errMessages)96 OptimizationResult ReturnWithError(OptimizationResult res,
97 const Layer* layer,
98 const BackendSettings& backendSettings,
99 Optional<std::vector<std::string>&> errMessages)
100 {
101 std::stringstream failureMsg;
102 failureMsg << "Layer of type " << GetLayerTypeAsCString(layer->GetType())
103 << " is not supported on any preferred backend " << backendSettings.m_PreferredBackends;
104 ReportError(failureMsg.str(), errMessages);
105
106 res.m_Error = true;
107 return res;
108 }
109
110
CheckScaleSetOnQuantizedType(Layer * layer,Optional<std::vector<std::string> &> errMessages)111 bool CheckScaleSetOnQuantizedType(Layer* layer, Optional<std::vector<std::string>&> errMessages)
112 {
113 bool noErrors = true;
114 unsigned int numOutputs = layer->GetNumOutputSlots();
115 for (unsigned int i = 0; i < numOutputs; i++) {
116 OutputSlot& outputSlot = layer->GetOutputSlot(i);
117 TensorInfo info = outputSlot.GetTensorInfo();
118 if (DataType::QAsymmU8 == info.GetDataType()) {
119 if (0.f == info.GetQuantizationScale()) {
120 noErrors = false;
121 std::stringstream ss;
122 ss << "output " << i << " of layer " << GetLayerTypeAsCString(layer->GetType())
123 << " (" << layer->GetNameStr() << ") is of type"
124 << " Quantized 8 bit but its scale parameter has not been set";
125 ReportError(ss.str(), errMessages);
126 }
127 // Softmax under QuantisedAsymm8 must always be scale (1.0f/256.0f) and offset 0
128 if ((info.GetQuantizationScale() != (1.0f / 256.0f) ||
129 info.GetQuantizationOffset() != 0) &&
130 layer->GetType() == armnn::LayerType::Softmax)
131 {
132 std::stringstream ss;
133 ss << "Quantization parameters for Softmax layer (Scale: " <<
134 info.GetQuantizationScale() << " and Offset: " << info.GetQuantizationOffset() <<
135 ") are incorrect and have been updated to Scale: 0.00390625 and Offset: 0";
136 ARMNN_LOG(warning) << ss.str();
137 info.SetQuantizationScale((1.0f /256.0f));
138 info.SetQuantizationOffset(0);
139 outputSlot.SetTensorInfo(info);
140 }
141 }
142 }
143 return noErrors;
144 }
145
146 template <typename LayerT>
ConvertBf16ToFp32Weight(Layer * l)147 LayerT* ConvertBf16ToFp32Weight(Layer* l)
148 {
149 LayerT* layer = PolymorphicDowncast<LayerT*>(l);
150 if ((layer->GetType() == LayerType::Convolution2d || layer->GetType() == LayerType::FullyConnected)
151 && layer->m_Weight)
152 {
153 const TensorInfo& info = layer->m_Weight->GetTensorInfo();
154
155 if (info.GetDataType() == DataType::BFloat16)
156 {
157 std::vector<float> newValues(info.GetNumElements());
158
159 armnnUtils::FloatingPointConverter::ConvertBFloat16ToFloat32(
160 layer->m_Weight->template GetTensor<armnn::BFloat16>(), info.GetNumElements(), newValues.data());
161
162 TensorInfo newInfo(info.GetShape(), DataType::Float32);
163 ConstTensor newInput(newInfo, newValues);
164 layer->m_Weight.reset(new ScopedCpuTensorHandle(newInput));
165 }
166 }
167 return layer;
168 }
169
AttemptBackendAssignment(BackendSettings & backendSettings,Graph & graph,Layer * layer,BackendId backend,DataType dataTypeIn,DataType dataTypeOut,const std::vector<BackendId> & availablePreferredBackends,std::string & reasonIfUnsupported,Optional<std::vector<std::string> &> errMessages)170 OptimizationResult AttemptBackendAssignment(BackendSettings& backendSettings,
171 Graph& graph,
172 Layer* layer,
173 BackendId backend,
174 DataType dataTypeIn,
175 DataType dataTypeOut,
176 const std::vector<BackendId>& availablePreferredBackends,
177 std::string& reasonIfUnsupported,
178 Optional<std::vector<std::string>&> errMessages)
179 {
180 OptimizationResult result;
181
182 // Helper lambda to compose meaningful error message before returning with error
183 auto ReturnError = [&](const Layer* layer)
184 {
185 return ReturnWithError(result, layer, backendSettings, errMessages);
186 };
187
188 // need to set the compute device on the layer
189 // before we can check if it is supported
190 layer->SetBackendId(backend);
191 if (!IWorkloadFactory::IsLayerSupported(*layer, EmptyOptional(), reasonIfUnsupported))
192 {
193 if (dataTypeIn == DataType::Float16 || dataTypeOut == DataType::Float16)
194 {
195 if (IWorkloadFactory::IsLayerSupported(*layer, DataType::Float32, reasonIfUnsupported)
196 && layer->GetType() != LayerType::ConvertFp32ToFp16
197 && layer->GetType() != LayerType::ConvertFp16ToFp32)
198 {
199 // Insert FP16 -> FP32 conversion layer before current layer
200 std::vector<ConvertFp16ToFp32Layer*> convertFp16ToFp32Layers;
201 if (dataTypeIn == DataType::Float16)
202 {
203 convertFp16ToFp32Layers =
204 InsertConvertFp16ToFp32LayersBefore(graph, *layer);
205 }
206
207 // Insert FP32 -> FP16 conversion layer after current layer
208 std::vector<ConvertFp32ToFp16Layer*> convertFp32ToFp16Layers;
209 if (dataTypeOut == DataType::Float16)
210 {
211 convertFp32ToFp16Layers =
212 InsertConvertFp32ToFp16LayersAfter(graph, *layer);
213 }
214
215 // Assign a supported backend to the newly introduced conversion layers
216 auto AssignFirstSupportedBackend = [&](Layer* layer, BackendId preferredBackend)
217 {
218 bool supportedBackendFound = false;
219 std::string reasonIfUnsupported;
220
221 // Try preferred backend first
222 layer->SetBackendId(preferredBackend);
223 if (IWorkloadFactory::IsLayerSupported(*layer,
224 EmptyOptional(),
225 reasonIfUnsupported))
226 {
227 supportedBackendFound = true;
228 }
229 else
230 {
231 for (const auto& backend : availablePreferredBackends)
232 {
233 // Skip preferred backend (we already determined that it is not supported)
234 if (backend == preferredBackend)
235 {
236 continue;
237 }
238
239 layer->SetBackendId(backend);
240 if (IWorkloadFactory::IsLayerSupported(*layer,
241 EmptyOptional(),
242 reasonIfUnsupported))
243 {
244 supportedBackendFound = true;
245 break;
246 }
247 }
248 }
249
250 return supportedBackendFound;
251 };
252
253 for (ConvertFp16ToFp32Layer* convertLayer : convertFp16ToFp32Layers)
254 {
255 if (!AssignFirstSupportedBackend(convertLayer, backend))
256 {
257 return ReturnError(convertLayer);
258 }
259 }
260
261 for (ConvertFp32ToFp16Layer* convertLayer : convertFp32ToFp16Layers)
262 {
263 if (!AssignFirstSupportedBackend(convertLayer, backend))
264 {
265 return ReturnError(convertLayer);
266 }
267 }
268
269 return result;
270 }
271 }
272 else if (dataTypeIn == DataType::BFloat16 || dataTypeOut == DataType::BFloat16)
273 {
274 if (IWorkloadFactory::IsLayerSupported(*layer, DataType::Float32, reasonIfUnsupported)
275 && layer->GetType() != LayerType::ConvertFp32ToBf16
276 && layer->GetType() != LayerType::ConvertBf16ToFp32)
277 {
278 // Insert BF16 -> FP32 conversion layer before current layer
279 std::vector<ConvertBf16ToFp32Layer*> convertBf16ToFp32Layers;
280 if (dataTypeIn == DataType::BFloat16)
281 {
282 convertBf16ToFp32Layers =
283 InsertConvertBf16ToFp32LayersBefore(graph, *layer);
284 if (layer->GetType() == LayerType::Convolution2d)
285 {
286 ConvertBf16ToFp32Weight<Convolution2dLayer>(layer);
287 }
288 else if (layer->GetType() == LayerType::FullyConnected)
289 {
290 ConvertBf16ToFp32Weight<FullyConnectedLayer>(layer);
291 }
292 }
293
294 // Insert FP32 -> BF16 conversion layer after current layer
295 std::vector<ConvertFp32ToBf16Layer*> convertFp32ToBf16Layers;
296 if (dataTypeOut == DataType::BFloat16)
297 {
298 convertFp32ToBf16Layers =
299 InsertConvertFp32ToBf16LayersAfter(graph, *layer);
300 }
301
302 // Assign a supported backend to the newly introduced conversion layers
303 auto AssignFirstSupportedBackend = [&](Layer* layer, BackendId preferredBackend)
304 {
305 bool supportedBackendFound = false;
306 std::string reasonIfUnsupported;
307
308 // Try preferred backend first
309 layer->SetBackendId(preferredBackend);
310 if (IWorkloadFactory::IsLayerSupported(*layer,
311 EmptyOptional(),
312 reasonIfUnsupported))
313 {
314 supportedBackendFound = true;
315 }
316 else
317 {
318 for (const auto& backend : availablePreferredBackends)
319 {
320 // Skip preferred backend (we already determined that it is not supported)
321 if (backend == preferredBackend)
322 {
323 continue;
324 }
325
326 layer->SetBackendId(backend);
327 if (IWorkloadFactory::IsLayerSupported(*layer,
328 EmptyOptional(),
329 reasonIfUnsupported))
330 {
331 supportedBackendFound = true;
332 break;
333 }
334 }
335 }
336
337 return supportedBackendFound;
338 };
339
340 for (ConvertBf16ToFp32Layer* convertLayer : convertBf16ToFp32Layers)
341 {
342 if (!AssignFirstSupportedBackend(convertLayer, backend))
343 {
344 return ReturnError(convertLayer);
345 }
346 }
347
348 for (ConvertFp32ToBf16Layer* convertLayer : convertFp32ToBf16Layers)
349 {
350 if (!AssignFirstSupportedBackend(convertLayer, backend))
351 {
352 return ReturnError(convertLayer);
353 }
354 }
355
356 return result;
357 }
358 }
359
360 std::stringstream warningMsg;
361 warningMsg << "Layer of type " << GetLayerTypeAsCString(layer->GetType())
362 << " is not supported on requested backend " << layer->GetBackendId().Get()
363 << " for input data type " << GetDataTypeName(dataTypeIn)
364 << " and output data type " << GetDataTypeName(dataTypeOut)
365 << " (reason: " << reasonIfUnsupported
366 << "), falling back to the next backend.";
367 ReportWarning(warningMsg.str(), errMessages);
368
369 return OptimizationResult(true, false);
370 }
371 else
372 {
373 return result;
374 }
375 }
376
377
AssignBackends(OptimizedNetwork * optNetObjPtr,BackendSettings & backendSettings,Graph::Iterator & firstLayer,Graph::Iterator & lastLayer,Optional<std::vector<std::string> &> errMessages)378 OptimizationResult AssignBackends(OptimizedNetwork* optNetObjPtr,
379 BackendSettings& backendSettings,
380 Graph::Iterator& firstLayer,
381 Graph::Iterator& lastLayer,
382 Optional<std::vector<std::string>&> errMessages)
383 {
384 OptimizationResult result;
385
386 // Helper lambda to compose meaningful error message before returning with error
387 auto ReturnError = [&](const Layer* layer)
388 {
389 return ReturnWithError(result, layer, backendSettings, errMessages);
390 };
391
392
393 auto availablePreferredBackends = backendSettings.GetAvailablePreferredBackends();
394 if (availablePreferredBackends.empty())
395 {
396 std::stringstream failureMsg;
397 failureMsg << "No preferred backends are available";
398 ReportError(failureMsg.str(), errMessages);
399
400 result.m_Error = true;
401 return result;
402 }
403
404 for (auto it = firstLayer; it != lastLayer; ++it)
405 {
406 auto layer = *it;
407
408 DataType dataTypeIn = layer->GetNumInputSlots() == 0 ? DataType::Float32 :
409 layer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo().GetDataType();
410 DataType dataTypeOut = layer->GetNumOutputSlots() == 0 ? DataType::Float32 :
411 layer->GetOutputSlot(0).GetTensorInfo().GetDataType();
412
413 std::string reasonIfUnsupported;
414 bool found = false;
415 if (!CheckScaleSetOnQuantizedType(layer, errMessages))
416 {
417 // don't bomb immediately, find all the quantized outputs
418 // which haven't had a scale set and report them all back.
419 result.m_Error = true;
420 }
421
422 // First try assign layer to hint backend
423 if (layer->GetBackendHint().has_value() &&
424 backendSettings.IsBackendSupported(layer->GetBackendHint().value()) &&
425 AttemptBackendAssignment(backendSettings,
426 optNetObjPtr->GetGraph(),
427 layer,
428 layer->GetBackendHint().value(),
429 dataTypeIn,
430 dataTypeOut,
431 availablePreferredBackends,
432 reasonIfUnsupported,
433 errMessages).IsOk())
434 {
435 found = true;
436 backendSettings.m_SelectedBackends.insert(layer->GetBackendHint().value());
437 }
438 else
439 {
440 // Try assign layer to prefered list of backends
441 for (const auto& backend : availablePreferredBackends)
442 {
443 if (layer->GetBackendHint().has_value() &&
444 layer->GetBackendHint().value() == backend)
445 {
446 continue; //Don't re-test the backend hint
447 }
448
449 OptimizationResult res = AttemptBackendAssignment(backendSettings,
450 optNetObjPtr->GetGraph(),
451 layer,
452 backend,
453 dataTypeIn,
454 dataTypeOut,
455 availablePreferredBackends,
456 reasonIfUnsupported,
457 errMessages);
458
459 if (res.IsOk())
460 {
461 found = true;
462 backendSettings.m_SelectedBackends.insert(backend);
463 break;
464 }
465 else if (res.IsError())
466 {
467 return res; // Cannot continue.
468 // Note: we don't need to log the error as it would already
469 // be logged in AttemptBackendAssignment().
470 }
471 else
472 {
473 ARMNN_ASSERT_MSG(res.IsWarningOnly(), "OptimizationResult in unexpected state.");
474 }
475 }
476 }
477
478 // If the layer is unsupported by any devices, log and return a null network.
479 if (!found)
480 {
481 // NOTE: if the layer is not an operation queue type AND we have not got CpuRef as a
482 // fallback we should set the compute device on the layer to CpuRef (these are not
483 // available as accelerated operations, or are only available under certain
484 // conditions, currently they comprise MemCopy, Constant, Permute)
485 armnn::LayerType layerType = layer->GetType();
486 if (!backendSettings.IsCpuRefUsed() && (layerType == armnn::LayerType::MemCopy ||
487 layerType == armnn::LayerType::Constant ||
488 layerType == armnn::LayerType::Permute))
489 {
490 BackendId cpuBackendId(armnn::Compute::CpuRef);
491 layer->SetBackendId(cpuBackendId);
492 backendSettings.m_SelectedBackends.insert(cpuBackendId);
493 }
494 else
495 {
496 return ReturnError(layer);
497 }
498 }
499 }
500
501 return result;
502 }
503
AssignBackends(OptimizedNetwork * optNetObjPtr,BackendSettings & backendSettings,SubgraphView & subgraph,Optional<std::vector<std::string> &> errMessages)504 OptimizationResult AssignBackends(OptimizedNetwork* optNetObjPtr,
505 BackendSettings& backendSettings,
506 SubgraphView& subgraph,
507 Optional<std::vector<std::string>&> errMessages)
508 {
509 Graph::Iterator firstLayer = subgraph.begin();
510 Graph::Iterator lastLayer = subgraph.end();
511 return AssignBackends(optNetObjPtr,
512 backendSettings,
513 firstLayer,
514 lastLayer,
515 errMessages);
516 }
517
CreateSupportedBackends(TensorHandleFactoryRegistry & handleFactoryRegistry,BackendSettings & backendSettings)518 BackendsMap CreateSupportedBackends(TensorHandleFactoryRegistry& handleFactoryRegistry,
519 BackendSettings& backendSettings)
520 {
521 BackendsMap backends;
522 auto const& backendRegistry = BackendRegistryInstance();
523 for (auto&& selectedBackend : backendSettings.m_SupportedBackends)
524 {
525 auto backendFactory = backendRegistry.GetFactory(selectedBackend);
526 auto backendObjPtr = backendFactory();
527 ARMNN_ASSERT(backendObjPtr);
528
529 backendObjPtr->RegisterTensorHandleFactories(handleFactoryRegistry);
530
531 backends[backendObjPtr->GetId()] = std::move(backendObjPtr);
532 }
533
534 return backends;
535 }
536
ApplyBackendOptimizations(OptimizedNetwork * optNetObjPtr,BackendSettings & backendSettings,BackendsMap & backends,const ModelOptions & modelOptions,Optional<std::vector<std::string> &> errMessages)537 OptimizationResult ApplyBackendOptimizations(OptimizedNetwork* optNetObjPtr,
538 BackendSettings& backendSettings,
539 BackendsMap& backends,
540 const ModelOptions& modelOptions,
541 Optional<std::vector<std::string>&> errMessages)
542 {
543 ARMNN_ASSERT(optNetObjPtr);
544
545 OptimizationResult result;
546
547 // Get the optimized graph
548 Graph& optGraph = optNetObjPtr->GetGraph();
549
550 // Run backend specific optimizations
551 for (auto&& selectedBackend : backendSettings.m_SelectedBackends)
552 {
553 auto backendObjPtr = backends.find(selectedBackend)->second.get();
554 ARMNN_ASSERT(backendObjPtr);
555
556 // Select sub-graphs based on backend
557 SubgraphViewSelector::Subgraphs subgraphs =
558 SubgraphViewSelector::SelectSubgraphs(optGraph,
559 // Select layers assigned to the requested backend
560 [&backendObjPtr](const Layer& layer)
561 {
562 return layer.GetType() != LayerType::Input &&
563 layer.GetType() != LayerType::Output &&
564 layer.GetBackendId() == backendObjPtr->GetId();
565 });
566 if (subgraphs.empty())
567 {
568 // No sub-graphs found, try with next selected backend
569 continue;
570 }
571
572 // Try to optimize each sub-graph
573 for (auto& subgraph : subgraphs)
574 {
575 // Try to optimize the current sub-graph
576 OptimizationViews optimizationViews = backendObjPtr->OptimizeSubgraphView(*subgraph, modelOptions);
577 ARMNN_ASSERT(optimizationViews.Validate(*subgraph));
578
579 // Optimization attempted, check the resulting optimized sub-graph
580 for (auto& substitution : optimizationViews.GetSubstitutions())
581 {
582 // Sub-graph optimized, substitute the sub-graph with the new optimized one in the main optimized graph
583 SubgraphView& replacementSubgraph = substitution.m_ReplacementSubgraph;
584 SubgraphView& substitutableSubgraph = substitution.m_SubstitutableSubgraph;
585 optGraph.SubstituteSubgraph(substitutableSubgraph, replacementSubgraph);
586
587 // Assign the current backend to the optimized sub-graph
588 std::for_each(replacementSubgraph.begin(), replacementSubgraph.end(), [&selectedBackend](Layer* l)
589 {
590 ARMNN_ASSERT(l);
591 l->SetBackendId(selectedBackend);
592 });
593 }
594
595 if (!optimizationViews.GetFailedSubgraphs().empty())
596 {
597 std::stringstream warningMsg;
598 warningMsg << "Some sub-graph(s) failed to optimized on " << backendObjPtr->GetId() << " backend.";
599 ReportWarning(warningMsg.str(), errMessages);
600
601 // Failed to optimize the given sub-graph, re-assign the sub-graph layers to other available backends
602 BackendSettings settingsCopy(backendSettings);
603 if (!backendObjPtr->GetId().IsCpuRef())
604 {
605 // Add the current backend to the list of backends to ignore
606 settingsCopy.m_IgnoredBackends.insert(backendObjPtr->GetId());
607 }
608
609 int count=0;
610 for (auto& failedSubgraph : optimizationViews.GetFailedSubgraphs())
611 {
612 // An error occurred: the optimization was attempted but not performed, try different backends
613 std::stringstream subgraphMsg;
614 subgraphMsg << "Re-assigning backends to " << failedSubgraph.GetLayers().size()
615 << " layers inside sub-graph " << count++;
616 ReportWarning(subgraphMsg.str(), errMessages);
617
618 OptimizationResult reassignmentResult = AssignBackends(optNetObjPtr,
619 settingsCopy,
620 *subgraph,
621 errMessages);
622 if (reassignmentResult.m_Error)
623 {
624 // Failed to re-assign one of the remaining backends to each layer of the sub-graph
625 result.m_Error = true;
626 return result;
627 }
628 }
629 }
630 }
631 }
632
633 return result;
634 }
635
RequiresCopy(ITensorHandleFactory::FactoryId src,ITensorHandleFactory::FactoryId dst,TensorHandleFactoryRegistry & registry)636 bool RequiresCopy(ITensorHandleFactory::FactoryId src,
637 ITensorHandleFactory::FactoryId dst,
638 TensorHandleFactoryRegistry& registry)
639 {
640 if (src != dst)
641 {
642 ITensorHandleFactory* srcFactory = registry.GetFactory(src);
643 ITensorHandleFactory* dstFactory = registry.GetFactory(dst);
644
645 if (srcFactory && dstFactory &&
646 (srcFactory->GetExportFlags() & dstFactory->GetImportFlags()) != 0)
647 {
648 return false;
649 }
650 return true;
651 }
652 return false;
653 }
654
655 // Find the handle factory for the input layer which results in fewest required copies.
CalculateSlotOptionForInput(BackendsMap & backends,OutputSlot & slot,TensorHandleFactoryRegistry & registry)656 ITensorHandleFactory::FactoryId CalculateSlotOptionForInput(BackendsMap& backends,
657 OutputSlot& slot,
658 TensorHandleFactoryRegistry& registry)
659 {
660 Layer& layer = slot.GetOwningLayer();
661 ARMNN_ASSERT(layer.GetType() == LayerType::Input);
662
663 // Explicitly select the tensorhandle factory for InputLayer because the rules for it are slightly different. It
664 // doesn't matter which backend it is assigned to because they all use the same implementation, which
665 // requires Map/Unmap support. This means that, so long as the handle type supports map/unmap semantics, we can
666 // select a factory with maximum compatibility with the layers connected to the InputLayer.
667
668 // First ensure the from backends can support the TensorHandeAPI
669 auto frmBackend = backends.find(layer.GetBackendId());
670 if (frmBackend == backends.end() ||
671 !frmBackend->second->SupportsTensorAllocatorAPI())
672 {
673 return ITensorHandleFactory::LegacyFactoryId;
674 }
675
676 // Go through all connections to the output slot and determine the TensorHandleFactory which results in the
677 // fewest copies.
678 std::map<ITensorHandleFactory::FactoryId, int> factoryScores;
679 int topScore = 0;
680 ITensorHandleFactory::FactoryId topChoice = ITensorHandleFactory::LegacyFactoryId;
681
682 for (auto&& connection : slot.GetConnections())
683 {
684 const Layer& connectedLayer = connection->GetOwningLayer();
685
686 auto toBackend = backends.find(connectedLayer.GetBackendId());
687 ARMNN_ASSERT_MSG(toBackend != backends.end(), "Backend id not found for the connected layer");
688
689 if (!toBackend->second.get()->SupportsTensorAllocatorAPI())
690 {
691 // The destination backend does not support the tensor allocator API, move to the next one
692 continue;
693 }
694
695 auto dstPrefs = toBackend->second.get()->GetHandleFactoryPreferences();
696 for (auto&& dst : dstPrefs)
697 {
698 // Input layers use the mem copy workload or import, so the selected factory must
699 // support either the map/unmap API or Import API
700 ITensorHandleFactory* factory = registry.GetFactory(dst);
701 if (!factory->SupportsMapUnmap() &&
702 !CheckFlag(factory->GetImportFlags(), MemorySource::Malloc)) // Just support cpu mem imports for now
703 {
704 // The current tensor handle factory does not support the map/unmap or import
705 // strategy, move to the next one
706 continue;
707 }
708
709 auto it = factoryScores.find(dst);
710 if (it == factoryScores.end())
711 {
712 // Add new score to the table
713 factoryScores[dst] = 0;
714 if (topChoice == ITensorHandleFactory::LegacyFactoryId)
715 {
716 topChoice = dst;
717 }
718 }
719 else
720 {
721 // Increase the score
722 factoryScores[dst]++;
723
724 // Track the best option
725 if (factoryScores[dst] > topScore)
726 {
727 topScore = factoryScores[dst];
728 topChoice = dst;
729 }
730 }
731 }
732 }
733
734 return topChoice;
735 }
736
737 // Find the handle factory for the output layer which results in fewest required copies.
CalculateSlotOptionForOutput(BackendsMap & backends,OutputSlot & slot,TensorHandleFactoryRegistry & registry)738 ITensorHandleFactory::FactoryId CalculateSlotOptionForOutput(BackendsMap& backends,
739 OutputSlot& slot,
740 TensorHandleFactoryRegistry& registry)
741 {
742 IgnoreUnused(backends, slot, registry);
743 return ITensorHandleFactory::DeferredFactoryId;
744 }
745
746 // For all handle factories supported on the source backend, we wish to find the one which requires the fewest copies
747 // when considering all connections.
CalculateSlotOption(BackendsMap & backends,OutputSlot & outputSlot,TensorHandleFactoryRegistry & registry)748 ITensorHandleFactory::FactoryId CalculateSlotOption(BackendsMap& backends,
749 OutputSlot& outputSlot,
750 TensorHandleFactoryRegistry& registry)
751 {
752 // First ensure the from backends can support the TensorHandeAPI
753 Layer& layer = outputSlot.GetOwningLayer();
754 auto frmBackend = backends.find(layer.GetBackendId());
755 if (frmBackend == backends.end() ||
756 !frmBackend->second->SupportsTensorAllocatorAPI())
757 {
758 return ITensorHandleFactory::LegacyFactoryId;
759 }
760
761 // Connections to Output Layers requires support for map/unmap on the TensorHandle.
762 bool requiresMapUnmap = false;
763 for (auto&& connection : outputSlot.GetConnections())
764 {
765 const Layer& connectedLayer = connection->GetOwningLayer();
766 if (connectedLayer.GetType() == LayerType::Output)
767 {
768 requiresMapUnmap = true;
769 }
770 }
771
772 IBackendInternal* srcBackend = frmBackend->second.get();
773 auto srcPrefs = srcBackend->GetHandleFactoryPreferences();
774
775 // Initialize the scores
776 std::map<ITensorHandleFactory::FactoryId, int> factoryScores;
777 for (auto&& pref : srcPrefs)
778 {
779 if (requiresMapUnmap) // Only consider factories that support map/unmap if required
780 {
781 ITensorHandleFactory* factory = registry.GetFactory(pref);
782 if (!factory->SupportsMapUnmap())
783 {
784 // The current tensor handle factory does not support the map/unmap strategy, move to the next one
785 continue;
786 }
787 }
788
789 auto it = factoryScores.find(pref);
790 if (it == factoryScores.end())
791 {
792 // Add new score to the table
793 factoryScores[pref] = 0;
794 }
795 }
796
797 // Score each handle factory based on how many times it requires copies on the slot connections
798 for (auto&& connection : outputSlot.GetConnections())
799 {
800 const Layer& connectedLayer = connection->GetOwningLayer();
801
802 auto toBackend = backends.find(connectedLayer.GetBackendId());
803 ARMNN_ASSERT_MSG(toBackend != backends.end(), "Backend id not found for the connected layer");
804
805 auto dstPrefs = toBackend->second.get()->GetHandleFactoryPreferences();
806 for (auto&& src : srcPrefs)
807 {
808 if (factoryScores.find(src) == factoryScores.end()) // Don't consider excluded factories
809 {
810 continue;
811 }
812
813 for (auto&& dst : dstPrefs)
814 {
815 if (RequiresCopy(src, dst, registry))
816 {
817 // Copy avoided, increase the score
818 factoryScores[src]++;
819 break;
820 }
821 }
822 }
823 }
824
825 // Find the lowest score
826 int minScore = std::numeric_limits<int>::max();
827 for (auto it : factoryScores)
828 {
829 minScore = std::min(minScore, it.second);
830 }
831
832 // Collect factories matching the best(lowest) score
833 std::vector<ITensorHandleFactory::FactoryId> optimalFactories;
834 for (auto it : factoryScores)
835 {
836 if (it.second == minScore)
837 {
838 optimalFactories.push_back(it.first);
839 }
840 }
841
842 // For all compatible Factories matching the best score, find the preferred one for the current layer.
843 for (auto&& srcPref : srcPrefs)
844 {
845 for (auto&& comp : optimalFactories)
846 {
847 if (comp == srcPref)
848 {
849 return comp;
850 }
851 }
852 }
853
854 return ITensorHandleFactory::LegacyFactoryId;
855 }
856
CalculateEdgeStrategy(BackendsMap & backends,ITensorHandleFactory::FactoryId srcFactoryId,const Layer & layer,const Layer & connectedLayer,TensorHandleFactoryRegistry & registry,bool importEnabled)857 EdgeStrategy CalculateEdgeStrategy(BackendsMap& backends,
858 ITensorHandleFactory::FactoryId srcFactoryId,
859 const Layer& layer,
860 const Layer& connectedLayer,
861 TensorHandleFactoryRegistry& registry,
862 bool importEnabled)
863 {
864 auto toBackend = backends.find(connectedLayer.GetBackendId());
865 ARMNN_ASSERT_MSG(toBackend != backends.end(), "Backend id not found for the connected layer");
866
867 auto dstPrefs = toBackend->second.get()->GetHandleFactoryPreferences();
868
869 // Legacy API check for backward compatibility
870 if (srcFactoryId == ITensorHandleFactory::LegacyFactoryId || dstPrefs.empty())
871 {
872 if (layer.GetBackendId() != connectedLayer.GetBackendId())
873 {
874 return EdgeStrategy::CopyToTarget;
875 }
876 else
877 {
878 return EdgeStrategy::DirectCompatibility;
879 }
880 }
881
882 // TensorHandleFactory API present, so perform more sophisticated strategies.
883 // Dst Output layers don't require copy because they use import or map/unmap
884 if (connectedLayer.GetType() == LayerType::Output)
885 {
886 return EdgeStrategy::DirectCompatibility;
887 }
888
889 // Search for direct match in prefs
890 for (auto&& pref : dstPrefs)
891 {
892 if (pref == srcFactoryId)
893 {
894 return EdgeStrategy::DirectCompatibility;
895 }
896 }
897
898 // Search for export/import options
899 ITensorHandleFactory* srcFactory = registry.GetFactory(srcFactoryId);
900 if (srcFactory->GetExportFlags() != 0 && importEnabled)
901 {
902 for (auto&& pref : dstPrefs)
903 {
904 ITensorHandleFactory* dstFactory = registry.GetFactory(pref);
905
906 // Handles cases when a destPref is not listed in TensorHandleFactoryRegistry
907 if (!dstFactory) {
908 continue;
909 }
910
911 if ((dstFactory->GetImportFlags() & srcFactory->GetExportFlags()) != 0)
912 {
913 auto srcCapability = srcFactory->GetCapabilities(&layer, &layer, CapabilityClass::PaddingRequired);
914 auto dstCapability = dstFactory->GetCapabilities(&connectedLayer,
915 &connectedLayer,
916 CapabilityClass::PaddingRequired);
917 // Do not require memory copy if the source and destination do not require padding.
918 if (srcCapability.empty() && dstCapability.empty())
919 {
920 return EdgeStrategy::ExportToTarget;
921 }
922 }
923 }
924 }
925
926 // Search for copy options via map/unmap
927 if (srcFactory->SupportsMapUnmap())
928 {
929 for (auto&& pref : dstPrefs)
930 {
931 ITensorHandleFactory* dstFactory = registry.GetFactory(pref);
932 if (dstFactory && dstFactory->SupportsMapUnmap())
933 {
934 return EdgeStrategy::CopyToTarget;
935 }
936 }
937 }
938
939 return EdgeStrategy::Undefined;
940 }
941
942 // Select the TensorHandleFactories and the corresponding memory strategy
SelectTensorHandleStrategy(Graph & optGraph,BackendsMap & backends,TensorHandleFactoryRegistry & registry,bool importEnabled,Optional<std::vector<std::string> &> errMessages)943 OptimizationResult SelectTensorHandleStrategy(Graph& optGraph,
944 BackendsMap& backends,
945 TensorHandleFactoryRegistry& registry,
946 bool importEnabled,
947 Optional<std::vector<std::string>&> errMessages)
948 {
949 OptimizationResult result;
950
951 optGraph.ForEachLayer([&backends, ®istry, &result, &errMessages, importEnabled](Layer* layer)
952 {
953 ARMNN_ASSERT(layer);
954
955 // Lets make sure the backend is in our list of supported backends. Something went wrong during backend
956 // assignment if this check fails
957 ARMNN_ASSERT(backends.find(layer->GetBackendId()) != backends.end());
958
959 // Check each output separately
960 for (unsigned int slotIdx = 0; slotIdx < layer->GetNumOutputSlots(); slotIdx++)
961 {
962 OutputSlot& outputSlot = layer->GetOutputSlot(slotIdx);
963
964 ITensorHandleFactory::FactoryId slotOption = ITensorHandleFactory::LegacyFactoryId;
965
966 // Calculate the factory to use which results in the fewest copies being made.
967 switch(layer->GetType())
968 {
969 case LayerType::Input:
970 slotOption = CalculateSlotOptionForInput(backends, outputSlot, registry);
971 break;
972 case LayerType::Output:
973 slotOption = CalculateSlotOptionForOutput(backends, outputSlot, registry);
974 break;
975 default:
976 slotOption = CalculateSlotOption(backends, outputSlot, registry);
977 break;
978 }
979 outputSlot.SetTensorHandleFactory(slotOption);
980
981 // Now determine the "best" edge strategy for each connection given the slotOption.
982 unsigned int connectionIdx = 0;
983 for (auto&& connection : outputSlot.GetConnections())
984 {
985 const Layer& connectedLayer = connection->GetOwningLayer();
986
987 EdgeStrategy strategy = CalculateEdgeStrategy(backends, slotOption, *layer, connectedLayer,
988 registry, importEnabled);
989
990 if (strategy == EdgeStrategy::Undefined)
991 {
992 result.m_Error = true;
993 if (errMessages)
994 {
995 errMessages.value().emplace_back("Could not find valid strategy required for compatibility"
996 " between backends.");
997 }
998 return;
999 }
1000
1001 outputSlot.SetEdgeStrategy(connectionIdx, strategy);
1002
1003 connectionIdx++;
1004 }
1005 }
1006 });
1007
1008 return result;
1009 }
1010
Optimize(const INetwork & inNetwork,const std::vector<BackendId> & backendPreferences,const IDeviceSpec & deviceSpec,const OptimizerOptions & options,Optional<std::vector<std::string> &> messages)1011 IOptimizedNetworkPtr Optimize(const INetwork& inNetwork,
1012 const std::vector<BackendId>& backendPreferences,
1013 const IDeviceSpec& deviceSpec,
1014 const OptimizerOptions& options,
1015 Optional<std::vector<std::string>&> messages)
1016 {
1017 if (backendPreferences.empty())
1018 {
1019 throw InvalidArgumentException("Invoked Optimize with no backends specified");
1020 }
1021
1022 if (options.m_ReduceFp32ToFp16 && options.m_ReduceFp32ToBf16)
1023 {
1024 throw InvalidArgumentException("BFloat16 and Float16 optimization cannot be enabled at the same time.");
1025 }
1026
1027 const Network& network = *PolymorphicDowncast<const Network*>(&inNetwork);
1028 std::unique_ptr<Graph> graph = std::make_unique<Graph>(network.GetGraph());
1029
1030 auto optNet = IOptimizedNetworkPtr(new OptimizedNetwork(std::move(graph), options.m_ModelOptions),
1031 &IOptimizedNetwork::Destroy);
1032
1033 OptimizedNetwork* optNetObjPtr = PolymorphicDowncast<OptimizedNetwork*>(optNet.get());
1034
1035 // Get the optimized graph
1036 Graph& optGraph = optNetObjPtr->GetGraph();
1037
1038 // Perform AddBroadcastReshapeLayer optimisation
1039 using namespace optimizations;
1040 Optimizer::Pass(optGraph, MakeOptimizations(AddBroadcastReshapeLayer()));
1041
1042 // Infer the tensor infos for all output slots. Throws an exception on failure
1043 optGraph.InferTensorInfos();
1044
1045 // Perform optimisation passes
1046 Optimizer::Pass(optGraph, MakeOptimizations(SquashEqualPermuteSiblings(),
1047 SquashEqualTransposeSiblings(),
1048 SquashEqualReshapeSiblings(),
1049 OptimizeInversePermutes(),
1050 OptimizeInverseTransposes(),
1051 MovePermuteUp(),
1052 MoveTransposeUp(),
1053 PermuteAsReshape(),
1054 TransposeAsReshape(),
1055 OptimizeConsecutiveReshapes(),
1056 FoldPadIntoConvolution2d(),
1057 PermuteAndBatchToSpaceAsDepthToSpace(),
1058 TransposeAndBatchToSpaceAsDepthToSpace(),
1059 FuseBatchNormIntoConvolution2DFloat32(),
1060 FuseBatchNormIntoConvolution2DFloat16(),
1061 FuseBatchNormIntoDepthwiseConvolution2DFloat32(),
1062 FuseBatchNormIntoDepthwiseConvolution2DFloat16()));
1063
1064 // If Fp32 to Fp16 optimization is set convert Fp32 network to Fp16
1065 if (options.m_ReduceFp32ToFp16)
1066 {
1067 Optimizer::Pass(optGraph, MakeOptimizations(Fp32NetworkToFp16Converter()));
1068 Optimizer::Pass(optGraph, MakeOptimizations(ConvertConstantsFloatToHalf()));
1069 }
1070
1071 // If Fp32 to Bf16 optimization is set convert Fp32 network to Bf16
1072 // Convert input of Convolution2d and FullyConnected from Fp32 to Bf16
1073 // Only Constant weight of Convolution2d and FullyConnected are converted from Fp32 to Bf16
1074 if (options.m_ReduceFp32ToBf16)
1075 {
1076 Optimizer::Pass(optGraph, MakeOptimizations(Fp32NetworkToBf16Converter()));
1077 }
1078
1079 // Initialize backend settings
1080 BackendSettings backendSettings(backendPreferences, deviceSpec);
1081 if (backendSettings.GetAvailablePreferredBackends().empty())
1082 {
1083 std::stringstream failureMsg;
1084 failureMsg << "None of the preferred backends " << backendPreferences
1085 << " are supported. Current platform provides " << backendSettings.m_SupportedBackends;
1086 ReportError(failureMsg.str(), messages);
1087 throw InvalidArgumentException(failureMsg.str());
1088 }
1089
1090 // Create a map to temporarily hold initialized backend objects
1091 TensorHandleFactoryRegistry tensorHandleFactoryRegistry;
1092 BackendsMap backends = CreateSupportedBackends(tensorHandleFactoryRegistry, backendSettings);
1093
1094 // Assign an available backend to each layer
1095 Graph::Iterator firstLayer = optGraph.begin();
1096 Graph::Iterator lastLayer = optGraph.end();
1097 OptimizationResult assignBackendsResult = AssignBackends(optNetObjPtr,
1098 backendSettings,
1099 firstLayer,
1100 lastLayer,
1101 messages);
1102 if (assignBackendsResult.m_Error)
1103 {
1104 // Failed to assign a backend to each layer
1105 throw InvalidArgumentException("Failed to assign a backend to each layer");
1106 }
1107
1108 Optimizer::Pass(optGraph, MakeOptimizations(OptimizeInverseConversionsFp16(),
1109 OptimizeInverseConversionsFp32()));
1110
1111 // Apply the backend-specific optimizations
1112 OptimizationResult backendOptimizationResult = ApplyBackendOptimizations(optNetObjPtr,
1113 backendSettings,
1114 backends,
1115 options.m_ModelOptions,
1116 messages);
1117 if (backendOptimizationResult.m_Error)
1118 {
1119 // Failed to apply the backend-specific optimizations
1120 throw InvalidArgumentException("Failed to apply the backend-specific optimizations");
1121 }
1122
1123 // If the debug flag is set, then insert a DebugLayer after each layer
1124 // Doing this after applying the backend optimizations as they might have changed some layers
1125 if (options.m_Debug)
1126 {
1127 Optimizer::Pass(optGraph, MakeOptimizations(InsertDebugLayer()));
1128 }
1129
1130 // Calculate the compatibility strategies for tensor handles
1131 OptimizationResult strategyResult = SelectTensorHandleStrategy(optGraph,
1132 backends,
1133 tensorHandleFactoryRegistry,
1134 options.m_ImportEnabled,
1135 messages);
1136 if (strategyResult.m_Error)
1137 {
1138 // Failed to apply the backend-specific optimizations
1139 return IOptimizedNetworkPtr(nullptr, &IOptimizedNetwork::Destroy);
1140 }
1141
1142 // Based on the tensor handle strategy determined above, insert copy layers where required.
1143 optGraph.AddCompatibilityLayers(backends, tensorHandleFactoryRegistry);
1144
1145 // Convert constants
1146 Optimizer::Pass(optGraph, MakeOptimizations(ConvertConstantsFloatToHalf()));
1147 Optimizer::Pass(optGraph, MakeOptimizations(ConvertConstantsHalfToFloat()));
1148
1149 // Run backend specific optimizations (deprecated)
1150 for (auto&& chosenBackend : backendSettings.m_SelectedBackends)
1151 {
1152 auto factoryFun = BackendRegistryInstance().GetFactory(chosenBackend);
1153 auto backendPtr = factoryFun();
1154 ARMNN_ASSERT(backendPtr.get() != nullptr);
1155
1156 ARMNN_NO_DEPRECATE_WARN_BEGIN
1157 auto backendSpecificOptimizations = backendPtr->GetOptimizations();
1158 ARMNN_NO_DEPRECATE_WARN_END
1159
1160 if (!backendSpecificOptimizations.empty())
1161 {
1162 Optimizer::Pass(optNetObjPtr->GetGraph(), backendSpecificOptimizations);
1163 }
1164 }
1165
1166 return optNet;
1167 }
GetShapeInferenceMethod()1168 bool Network::GetShapeInferenceMethod()
1169 {
1170 if (m_NetworkOptions.size() > 0 && m_NetworkOptions[0].GetBackendId().Get() == "ShapeInferenceMethod")
1171 {
1172 return m_NetworkOptions[0].GetOption(0).GetValue().AsBool();
1173 }
1174
1175 return false;
1176 }
Network(NetworkOptions networkOptions)1177 Network::Network(NetworkOptions networkOptions)
1178 : m_NetworkOptions(networkOptions),
1179 m_Graph(std::make_unique<Graph>(GetShapeInferenceMethod()))
1180 {}
1181
~Network()1182 Network::~Network()
1183 {
1184 }
1185
PrintGraph()1186 Status Network::PrintGraph()
1187 {
1188 m_Graph->Print();
1189 return Status::Success;
1190 }
1191
AddInputLayer(LayerBindingId id,const char * name)1192 IConnectableLayer* Network::AddInputLayer(LayerBindingId id, const char* name)
1193 {
1194 return m_Graph->AddLayer<InputLayer>(id, name);
1195 }
1196
AddBatchToSpaceNdLayer(const BatchToSpaceNdDescriptor & batchToSpaceNdDescriptor,const char * name)1197 IConnectableLayer* Network::AddBatchToSpaceNdLayer(const BatchToSpaceNdDescriptor& batchToSpaceNdDescriptor,
1198 const char* name)
1199 {
1200 return m_Graph->AddLayer<BatchToSpaceNdLayer>(batchToSpaceNdDescriptor, name);
1201 }
1202
AddComparisonLayer(const ComparisonDescriptor & comparisonDescriptor,const char * name)1203 IConnectableLayer* Network::AddComparisonLayer(const ComparisonDescriptor& comparisonDescriptor,
1204 const char* name)
1205 {
1206 return m_Graph->AddLayer<ComparisonLayer>(comparisonDescriptor, name);
1207 }
1208
AddElementwiseUnaryLayer(const ElementwiseUnaryDescriptor & elementwiseUnaryDescriptor,const char * name)1209 IConnectableLayer* Network::AddElementwiseUnaryLayer(const ElementwiseUnaryDescriptor& elementwiseUnaryDescriptor,
1210 const char* name)
1211 {
1212 return m_Graph->AddLayer<ElementwiseUnaryLayer>(elementwiseUnaryDescriptor, name);
1213 }
1214
AddFillLayer(const FillDescriptor & fillDescriptor,const char * name)1215 IConnectableLayer* Network::AddFillLayer(const FillDescriptor& fillDescriptor,
1216 const char* name)
1217 {
1218 return m_Graph->AddLayer<FillLayer>(fillDescriptor, name);
1219 }
1220
AddFullyConnectedLayerImpl(const FullyConnectedDescriptor & fullyConnectedDescriptor,const ConstTensor & weights,const Optional<ConstTensor> & biases,const char * name)1221 IConnectableLayer* Network::AddFullyConnectedLayerImpl(const FullyConnectedDescriptor& fullyConnectedDescriptor,
1222 const ConstTensor& weights,
1223 const Optional<ConstTensor>& biases,
1224 const char* name)
1225 {
1226 if (fullyConnectedDescriptor.m_BiasEnabled && !biases.has_value())
1227 {
1228 throw InvalidArgumentException("AddFullyConnectedLayer: biases cannot be empty");
1229 }
1230
1231 const auto layer = m_Graph->AddLayer<FullyConnectedLayer>(fullyConnectedDescriptor, name);
1232
1233 layer->m_Weight = std::make_unique<ScopedCpuTensorHandle>(weights);
1234
1235 if (fullyConnectedDescriptor.m_BiasEnabled)
1236 {
1237 layer->m_Bias = std::make_unique<ScopedCpuTensorHandle>(biases.value());
1238 }
1239
1240 return layer;
1241 }
1242
AddFullyConnectedLayer(const FullyConnectedDescriptor & fullyConnectedDescriptor,const ConstTensor & weights,const Optional<ConstTensor> & biases,const char * name)1243 IConnectableLayer* Network::AddFullyConnectedLayer(const FullyConnectedDescriptor& fullyConnectedDescriptor,
1244 const ConstTensor& weights,
1245 const Optional<ConstTensor>& biases,
1246 const char* name)
1247 {
1248 return AddFullyConnectedLayerImpl(fullyConnectedDescriptor, weights, biases, name);
1249 }
1250
AddFullyConnectedLayer(const FullyConnectedDescriptor & fullyConnectedDescriptor,const ConstTensor & weights,const char * name)1251 IConnectableLayer* Network::AddFullyConnectedLayer(const FullyConnectedDescriptor& fullyConnectedDescriptor,
1252 const ConstTensor& weights,
1253 const char* name)
1254 {
1255 Optional<ConstTensor> biases;
1256 return AddFullyConnectedLayerImpl(fullyConnectedDescriptor, weights, biases, name);
1257 }
1258
AddFullyConnectedLayer(const FullyConnectedDescriptor & fullyConnectedDescriptor,const ConstTensor & weights,const ConstTensor & biases,const char * name)1259 IConnectableLayer* Network::AddFullyConnectedLayer(const FullyConnectedDescriptor& fullyConnectedDescriptor,
1260 const ConstTensor& weights,
1261 const ConstTensor& biases,
1262 const char* name)
1263 {
1264 Optional<ConstTensor> optionalBiases(biases);
1265 return AddFullyConnectedLayerImpl(fullyConnectedDescriptor, weights, optionalBiases, name);
1266 }
1267
AddConcatLayer(const ConcatDescriptor & concatDescriptor,const char * name)1268 IConnectableLayer* Network::AddConcatLayer(const ConcatDescriptor& concatDescriptor,
1269 const char* name)
1270 {
1271 return m_Graph->AddLayer<ConcatLayer>(concatDescriptor, name);
1272 }
1273
AddConvolution2dLayerImpl(const Convolution2dDescriptor & convolution2dDescriptor,const ConstTensor & weights,const Optional<ConstTensor> & biases,const char * name)1274 IConnectableLayer* Network::AddConvolution2dLayerImpl(const Convolution2dDescriptor& convolution2dDescriptor,
1275 const ConstTensor& weights,
1276 const Optional<ConstTensor>& biases,
1277 const char* name)
1278 {
1279 if (convolution2dDescriptor.m_BiasEnabled && !biases.has_value())
1280 {
1281 throw InvalidArgumentException("AddConvolution2dLayer: biases cannot be empty");
1282 }
1283
1284 const auto layer = m_Graph->AddLayer<Convolution2dLayer>(convolution2dDescriptor, name);
1285
1286 layer->m_Weight = std::make_unique<ScopedCpuTensorHandle>(weights);
1287
1288 if (convolution2dDescriptor.m_BiasEnabled)
1289 {
1290 layer->m_Bias = std::make_unique<ScopedCpuTensorHandle>(biases.value());
1291 }
1292
1293 return layer;
1294 }
1295
AddConvolution2dLayer(const Convolution2dDescriptor & convolution2dDescriptor,const ConstTensor & weights,const Optional<ConstTensor> & biases,const char * name)1296 IConnectableLayer* Network::AddConvolution2dLayer(const Convolution2dDescriptor& convolution2dDescriptor,
1297 const ConstTensor& weights,
1298 const Optional<ConstTensor>& biases,
1299 const char* name)
1300 {
1301 return AddConvolution2dLayerImpl(convolution2dDescriptor, weights, biases, name);
1302 }
1303
AddConvolution2dLayer(const Convolution2dDescriptor & convolution2dDescriptor,const ConstTensor & weights,const char * name)1304 IConnectableLayer* Network::AddConvolution2dLayer(const Convolution2dDescriptor& convolution2dDescriptor,
1305 const ConstTensor& weights,
1306 const char* name)
1307 {
1308 Optional<ConstTensor> biases;
1309 return AddConvolution2dLayerImpl(convolution2dDescriptor, weights, biases, name);
1310 }
1311
AddConvolution2dLayer(const Convolution2dDescriptor & convolution2dDescriptor,const ConstTensor & weights,const ConstTensor & biases,const char * name)1312 IConnectableLayer* Network::AddConvolution2dLayer(const Convolution2dDescriptor& convolution2dDescriptor,
1313 const ConstTensor& weights,
1314 const ConstTensor& biases,
1315 const char* name)
1316 {
1317 Optional<ConstTensor> optionalBiases(biases);
1318 return AddConvolution2dLayerImpl(convolution2dDescriptor, weights, optionalBiases, name);
1319 }
1320
AddDepthwiseConvolution2dLayerImpl(const DepthwiseConvolution2dDescriptor & convolution2dDescriptor,const ConstTensor & weights,const Optional<ConstTensor> & biases,const char * name)1321 IConnectableLayer* Network::AddDepthwiseConvolution2dLayerImpl(
1322 const DepthwiseConvolution2dDescriptor& convolution2dDescriptor,
1323 const ConstTensor& weights,
1324 const Optional<ConstTensor>& biases,
1325 const char* name)
1326 {
1327 if (convolution2dDescriptor.m_BiasEnabled && !biases.has_value())
1328 {
1329 throw InvalidArgumentException("AddDepthwiseConvolution2dLayer: biases cannot be empty");
1330 }
1331
1332 const auto layer = m_Graph->AddLayer<DepthwiseConvolution2dLayer>(convolution2dDescriptor, name);
1333
1334 layer->m_Weight = std::make_unique<ScopedCpuTensorHandle>(weights);
1335
1336 if (convolution2dDescriptor.m_BiasEnabled)
1337 {
1338 layer->m_Bias = std::make_unique<ScopedCpuTensorHandle>(biases.value());
1339 }
1340
1341 return layer;
1342 }
1343
AddDepthToSpaceLayer(const DepthToSpaceDescriptor & depthToSpaceDescriptor,const char * name)1344 IConnectableLayer* Network::AddDepthToSpaceLayer(const DepthToSpaceDescriptor& depthToSpaceDescriptor,
1345 const char* name)
1346 {
1347 return m_Graph->AddLayer<DepthToSpaceLayer>(depthToSpaceDescriptor, name);
1348 }
1349
AddDepthwiseConvolution2dLayer(const DepthwiseConvolution2dDescriptor & convolution2dDescriptor,const ConstTensor & weights,const Optional<ConstTensor> & biases,const char * name)1350 IConnectableLayer* Network::AddDepthwiseConvolution2dLayer(
1351 const DepthwiseConvolution2dDescriptor& convolution2dDescriptor,
1352 const ConstTensor& weights,
1353 const Optional<ConstTensor>& biases,
1354 const char* name)
1355 {
1356 return AddDepthwiseConvolution2dLayerImpl(convolution2dDescriptor, weights, biases, name);
1357 }
1358
AddDepthwiseConvolution2dLayer(const DepthwiseConvolution2dDescriptor & convolution2dDescriptor,const ConstTensor & weights,const char * name)1359 IConnectableLayer* Network::AddDepthwiseConvolution2dLayer(
1360 const DepthwiseConvolution2dDescriptor& convolution2dDescriptor,
1361 const ConstTensor& weights,
1362 const char* name)
1363 {
1364 Optional<ConstTensor> biases;
1365 return AddDepthwiseConvolution2dLayerImpl(convolution2dDescriptor, weights, biases, name);
1366 }
1367
AddDepthwiseConvolution2dLayer(const DepthwiseConvolution2dDescriptor & convolution2dDescriptor,const ConstTensor & weights,const ConstTensor & biases,const char * name)1368 IConnectableLayer* Network::AddDepthwiseConvolution2dLayer(
1369 const DepthwiseConvolution2dDescriptor& convolution2dDescriptor,
1370 const ConstTensor& weights,
1371 const ConstTensor& biases,
1372 const char* name)
1373 {
1374 Optional<ConstTensor> optionalBiases(biases);
1375 return AddDepthwiseConvolution2dLayerImpl(convolution2dDescriptor, weights, optionalBiases, name);
1376 }
1377
AddDetectionPostProcessLayer(const armnn::DetectionPostProcessDescriptor & descriptor,const ConstTensor & anchors,const char * name)1378 IConnectableLayer* Network::AddDetectionPostProcessLayer(const armnn::DetectionPostProcessDescriptor& descriptor,
1379 const ConstTensor& anchors, const char* name)
1380 {
1381 const auto layer = m_Graph->AddLayer<DetectionPostProcessLayer>(descriptor, name);
1382
1383 layer->m_Anchors = std::make_unique<ScopedCpuTensorHandle>(anchors);
1384
1385 return layer;
1386 }
1387
AddPermuteLayer(const PermuteDescriptor & permuteDescriptor,const char * name)1388 IConnectableLayer* Network::AddPermuteLayer(const PermuteDescriptor& permuteDescriptor,
1389 const char* name)
1390 {
1391 return m_Graph->AddLayer<PermuteLayer>(permuteDescriptor, name);
1392 }
1393
AddPooling2dLayer(const Pooling2dDescriptor & pooling2dDescriptor,const char * name)1394 IConnectableLayer* Network::AddPooling2dLayer(const Pooling2dDescriptor& pooling2dDescriptor,
1395 const char* name)
1396 {
1397 return m_Graph->AddLayer<Pooling2dLayer>(pooling2dDescriptor, name);
1398 }
1399
AddActivationLayer(const ActivationDescriptor & activationDescriptor,const char * name)1400 IConnectableLayer* Network::AddActivationLayer(const ActivationDescriptor& activationDescriptor,
1401 const char* name)
1402 {
1403 return m_Graph->AddLayer<ActivationLayer>(activationDescriptor, name);
1404 }
1405
AddArgMinMaxLayer(const ArgMinMaxDescriptor & argMinMaxDescriptor,const char * name)1406 IConnectableLayer* Network::AddArgMinMaxLayer(const ArgMinMaxDescriptor& argMinMaxDescriptor,
1407 const char* name)
1408 {
1409 return m_Graph->AddLayer<ArgMinMaxLayer>(argMinMaxDescriptor, name);
1410 }
1411
AddNormalizationLayer(const NormalizationDescriptor & normalizationDescriptor,const char * name)1412 IConnectableLayer* Network::AddNormalizationLayer(const NormalizationDescriptor&
1413 normalizationDescriptor,
1414 const char* name)
1415 {
1416 return m_Graph->AddLayer<NormalizationLayer>(normalizationDescriptor, name);
1417 }
1418
AddSliceLayer(const SliceDescriptor & sliceDescriptor,const char * name)1419 IConnectableLayer* Network::AddSliceLayer(const SliceDescriptor& sliceDescriptor, const char* name)
1420 {
1421 return m_Graph->AddLayer<SliceLayer>(sliceDescriptor, name);
1422 }
1423
AddSoftmaxLayer(const SoftmaxDescriptor & softmaxDescriptor,const char * name)1424 IConnectableLayer* Network::AddSoftmaxLayer(const SoftmaxDescriptor& softmaxDescriptor,
1425 const char* name)
1426 {
1427 return m_Graph->AddLayer<SoftmaxLayer>(softmaxDescriptor, name);
1428 }
1429
AddSplitterLayer(const ViewsDescriptor & splitterDescriptor,const char * name)1430 IConnectableLayer* Network::AddSplitterLayer(const ViewsDescriptor& splitterDescriptor,
1431 const char* name)
1432 {
1433 return m_Graph->AddLayer<SplitterLayer>(splitterDescriptor, name);
1434 }
1435
AddMaximumLayer(const char * name)1436 IConnectableLayer* Network::AddMaximumLayer(const char* name)
1437 {
1438 return m_Graph->AddLayer<MaximumLayer>(name);
1439 }
1440
AddMinimumLayer(const char * name)1441 IConnectableLayer* Network::AddMinimumLayer(const char* name)
1442 {
1443 return m_Graph->AddLayer<MinimumLayer>(name);
1444 }
1445
AddMergerLayer(const MergerDescriptor & mergerDescriptor,const char * name)1446 IConnectableLayer* Network::AddMergerLayer(const MergerDescriptor& mergerDescriptor,
1447 const char* name)
1448 {
1449 return AddConcatLayer(mergerDescriptor, name);
1450 }
1451
AddAbsLayer(const char * name)1452 IConnectableLayer* Network::AddAbsLayer(const char * name)
1453 {
1454 return AddElementwiseUnaryLayer(ElementwiseUnaryDescriptor(UnaryOperation::Abs), name);
1455 }
1456
AddAdditionLayer(const char * name)1457 IConnectableLayer* Network::AddAdditionLayer(const char* name)
1458 {
1459 return m_Graph->AddLayer<AdditionLayer>(name);
1460 }
1461
AddMultiplicationLayer(const char * name)1462 IConnectableLayer* Network::AddMultiplicationLayer(const char* name)
1463 {
1464 return m_Graph->AddLayer<MultiplicationLayer>(name);
1465 }
1466
AddOutputLayer(LayerBindingId id,const char * name)1467 IConnectableLayer* Network::AddOutputLayer(LayerBindingId id, const char* name)
1468 {
1469 return m_Graph->AddLayer<OutputLayer>(id, name);
1470 }
1471
AddBatchNormalizationLayer(const BatchNormalizationDescriptor & desc,const ConstTensor & mean,const ConstTensor & variance,const ConstTensor & beta,const ConstTensor & gamma,const char * name)1472 IConnectableLayer* Network::AddBatchNormalizationLayer(const BatchNormalizationDescriptor& desc,
1473 const ConstTensor& mean,
1474 const ConstTensor& variance,
1475 const ConstTensor& beta,
1476 const ConstTensor& gamma,
1477 const char* name)
1478 {
1479 const auto layer = m_Graph->AddLayer<BatchNormalizationLayer>(desc, name);
1480
1481 layer->m_Mean = std::make_unique<ScopedCpuTensorHandle>(mean);
1482 layer->m_Variance = std::make_unique<ScopedCpuTensorHandle>(variance);
1483 layer->m_Beta = std::make_unique<ScopedCpuTensorHandle>(beta);
1484 layer->m_Gamma = std::make_unique<ScopedCpuTensorHandle>(gamma);
1485
1486 return layer;
1487 }
1488
AddRankLayer(const char * name)1489 IConnectableLayer* Network::AddRankLayer(const char* name)
1490 {
1491 return m_Graph->AddLayer<RankLayer>(name);
1492 }
1493
AddResizeBilinearLayer(const ResizeBilinearDescriptor & descriptor,const char * name)1494 IConnectableLayer* Network::AddResizeBilinearLayer(const ResizeBilinearDescriptor& descriptor,
1495 const char* name)
1496 {
1497 ResizeDescriptor resizeDescriptor;
1498 resizeDescriptor.m_Method = ResizeMethod::Bilinear;
1499 resizeDescriptor.m_DataLayout = descriptor.m_DataLayout;
1500 resizeDescriptor.m_TargetWidth = descriptor.m_TargetWidth;
1501 resizeDescriptor.m_TargetHeight = descriptor.m_TargetHeight;
1502 resizeDescriptor.m_AlignCorners = descriptor.m_AlignCorners;
1503 resizeDescriptor.m_HalfPixelCenters = descriptor.m_HalfPixelCenters;
1504
1505 return m_Graph->AddLayer<ResizeLayer>(resizeDescriptor, name);
1506 }
1507
AddResizeLayer(const ResizeDescriptor & resizeDescriptor,const char * name)1508 IConnectableLayer* Network::AddResizeLayer(const ResizeDescriptor&
1509 resizeDescriptor, const char* name)
1510 {
1511 return m_Graph->AddLayer<ResizeLayer>(resizeDescriptor, name);
1512 }
1513
AddInstanceNormalizationLayer(const InstanceNormalizationDescriptor & desc,const char * name)1514 IConnectableLayer* Network::AddInstanceNormalizationLayer(const InstanceNormalizationDescriptor& desc,
1515 const char* name)
1516 {
1517 return m_Graph->AddLayer<InstanceNormalizationLayer>(desc, name);
1518 }
1519
AddL2NormalizationLayer(const L2NormalizationDescriptor & desc,const char * name)1520 IConnectableLayer* Network::AddL2NormalizationLayer(const L2NormalizationDescriptor& desc,
1521 const char* name)
1522 {
1523 return m_Graph->AddLayer<L2NormalizationLayer>(desc, name);
1524 }
1525
AddLogSoftmaxLayer(const LogSoftmaxDescriptor & desc,const char * name)1526 IConnectableLayer* Network::AddLogSoftmaxLayer(const LogSoftmaxDescriptor& desc,
1527 const char* name)
1528 {
1529 return m_Graph->AddLayer<LogSoftmaxLayer>(desc, name);
1530 }
1531
AddConstantLayer(const ConstTensor & input,const char * name)1532 IConnectableLayer* Network::AddConstantLayer(const ConstTensor& input, const char* name)
1533 {
1534 auto layer = m_Graph->AddLayer<ConstantLayer>(name);
1535
1536 layer->m_LayerOutput = std::make_unique<ScopedCpuTensorHandle>(input);
1537
1538 return layer;
1539 }
1540
AddReshapeLayer(const ReshapeDescriptor & reshapeDescriptor,const char * name)1541 IConnectableLayer* Network::AddReshapeLayer(const ReshapeDescriptor& reshapeDescriptor,
1542 const char* name)
1543 {
1544 return m_Graph->AddLayer<ReshapeLayer>(reshapeDescriptor, name);
1545 }
1546
AddSpaceToBatchNdLayer(const SpaceToBatchNdDescriptor & spaceToBatchNdDescriptor,const char * name)1547 IConnectableLayer* Network::AddSpaceToBatchNdLayer(const SpaceToBatchNdDescriptor& spaceToBatchNdDescriptor,
1548 const char* name)
1549 {
1550 return m_Graph->AddLayer<SpaceToBatchNdLayer>(spaceToBatchNdDescriptor, name);
1551 }
1552
AddSpaceToDepthLayer(const SpaceToDepthDescriptor & spaceToDepthDescriptor,const char * name)1553 IConnectableLayer* Network::AddSpaceToDepthLayer(const SpaceToDepthDescriptor& spaceToDepthDescriptor,
1554 const char* name)
1555 {
1556 return m_Graph->AddLayer<SpaceToDepthLayer>(spaceToDepthDescriptor, name);
1557 }
1558
AddFloorLayer(const char * name)1559 IConnectableLayer* Network::AddFloorLayer(const char* name)
1560 {
1561 return m_Graph->AddLayer<FloorLayer>(name);
1562 }
1563
AddLstmLayer(const LstmDescriptor & descriptor,const LstmInputParams & params,const char * name)1564 IConnectableLayer* Network::AddLstmLayer(const LstmDescriptor& descriptor,
1565 const LstmInputParams& params,
1566 const char* name)
1567 {
1568 const auto layer = m_Graph->AddLayer<LstmLayer>(descriptor, name);
1569
1570 //Lstm Basic Parameters
1571 layer->m_BasicParameters.m_InputToForgetWeights =
1572 std::make_unique<ScopedCpuTensorHandle>(*(params.m_InputToForgetWeights));
1573 layer->m_BasicParameters.m_InputToCellWeights =
1574 std::make_unique<ScopedCpuTensorHandle>(*(params.m_InputToCellWeights));
1575 layer->m_BasicParameters.m_InputToOutputWeights =
1576 std::make_unique<ScopedCpuTensorHandle>(*(params.m_InputToOutputWeights));
1577 layer->m_BasicParameters.m_RecurrentToForgetWeights =
1578 std::make_unique<ScopedCpuTensorHandle>(*(params.m_RecurrentToForgetWeights));
1579 layer->m_BasicParameters.m_RecurrentToCellWeights =
1580 std::make_unique<ScopedCpuTensorHandle>(*(params.m_RecurrentToCellWeights));
1581 layer->m_BasicParameters.m_RecurrentToOutputWeights =
1582 std::make_unique<ScopedCpuTensorHandle>(*(params.m_RecurrentToOutputWeights));
1583 layer->m_BasicParameters.m_ForgetGateBias =
1584 std::make_unique<ScopedCpuTensorHandle>(*(params.m_ForgetGateBias));
1585 layer->m_BasicParameters.m_CellBias =
1586 std::make_unique<ScopedCpuTensorHandle>(*(params.m_CellBias));
1587 layer->m_BasicParameters.m_OutputGateBias =
1588 std::make_unique<ScopedCpuTensorHandle>(*(params.m_OutputGateBias));
1589
1590 //Lstm Cifg parameters
1591 if(!descriptor.m_CifgEnabled)
1592 {
1593 if(params.m_InputToInputWeights == nullptr)
1594 {
1595 throw InvalidArgumentException("AddLstmLayer: Input To Input Weights cannot be NULL "
1596 "when CIFG is disabled.");
1597 }
1598 if(params.m_RecurrentToInputWeights == nullptr)
1599 {
1600 throw InvalidArgumentException(
1601 "AddLstmLayer: Recurrent To Input Weights cannot be NULL "
1602 "when CIFG is disabled.");
1603 }
1604 if(params.m_InputGateBias == nullptr)
1605 {
1606 throw InvalidArgumentException("AddLstmLayer: Input Gate Bias cannot be NULL "
1607 "when CIFG is disabled.");
1608 }
1609 layer->m_CifgParameters.m_InputToInputWeights =
1610 std::make_unique<ScopedCpuTensorHandle>(*(params.m_InputToInputWeights));
1611 layer->m_CifgParameters.m_RecurrentToInputWeights =
1612 std::make_unique<ScopedCpuTensorHandle>(*(params.m_RecurrentToInputWeights));
1613 layer->m_CifgParameters.m_InputGateBias =
1614 std::make_unique<ScopedCpuTensorHandle>(*(params.m_InputGateBias));
1615 }
1616
1617 //Lstm projection parameters
1618 if(descriptor.m_ProjectionEnabled)
1619 {
1620 if(params.m_ProjectionWeights == nullptr)
1621 {
1622 throw InvalidArgumentException("AddLstmLayer: Projection Weights cannot be NULL "
1623 "when projection is enabled.");
1624 }
1625 layer->m_ProjectionParameters.m_ProjectionWeights =
1626 std::make_unique<ScopedCpuTensorHandle>(*(params.m_ProjectionWeights));
1627 if(params.m_ProjectionBias != nullptr)
1628 {
1629 layer->m_ProjectionParameters.m_ProjectionBias =
1630 std::make_unique<ScopedCpuTensorHandle>(*(params.m_ProjectionBias));
1631 }
1632 }
1633
1634 //Lstm Peephole params
1635 if(descriptor.m_PeepholeEnabled)
1636 {
1637 if(!descriptor.m_CifgEnabled)
1638 {
1639 if(params.m_CellToInputWeights == nullptr)
1640 {
1641 throw InvalidArgumentException("AddLstmLayer: Cell To Input Weights cannot be NULL "
1642 "when Peephole is enabled and CIFG disabled.");
1643 }
1644
1645 layer->m_PeepholeParameters.m_CellToInputWeights =
1646 std::make_unique<ScopedCpuTensorHandle>(*(params.m_CellToInputWeights));
1647 }
1648
1649 if(params.m_CellToForgetWeights == nullptr)
1650 {
1651 throw InvalidArgumentException("AddLstmLayer: Cell To Forget Weights cannot be NULL "
1652 "when Peephole is enabled.");
1653 }
1654 if(params.m_CellToOutputWeights == nullptr)
1655 {
1656 throw InvalidArgumentException("AddLstmLayer: Cell To Output Weights cannot be NULL "
1657 "when Peephole is enabled.");
1658 }
1659
1660 layer->m_PeepholeParameters.m_CellToForgetWeights =
1661 std::make_unique<ScopedCpuTensorHandle>(*(params.m_CellToForgetWeights));
1662 layer->m_PeepholeParameters.m_CellToOutputWeights =
1663 std::make_unique<ScopedCpuTensorHandle>(*(params.m_CellToOutputWeights));
1664 }
1665
1666 //Lstm Layer Normalization params
1667 if(descriptor.m_LayerNormEnabled)
1668 {
1669 if(!descriptor.m_CifgEnabled)
1670 {
1671 if(params.m_InputLayerNormWeights == nullptr)
1672 {
1673 throw InvalidArgumentException("AddLstmLayer: Input layer normalization weights cannot be NULL "
1674 "when layer normalization is enabled and CIFG disabled.");
1675 }
1676 layer->m_LayerNormParameters.m_InputLayerNormWeights =
1677 std::make_unique<ScopedCpuTensorHandle>(*(params.m_InputLayerNormWeights));
1678 }
1679
1680 if(params.m_ForgetLayerNormWeights == nullptr)
1681 {
1682 throw InvalidArgumentException("AddLstmLayer: Forget layer normalization weights cannot be NULL "
1683 "when layer normalization is enabled.");
1684 }
1685 if(params.m_CellLayerNormWeights == nullptr)
1686 {
1687 throw InvalidArgumentException("AddLstmLayer: Cell layer normalization weights cannot be NULL "
1688 "when layer normalization is enabled.");
1689 }
1690 if(params.m_OutputLayerNormWeights == nullptr)
1691 {
1692 throw InvalidArgumentException("AddLstmLayer: Output layer normalization weights cannot be NULL "
1693 "when layer normalization is enabled.");
1694 }
1695 layer->m_LayerNormParameters.m_ForgetLayerNormWeights =
1696 std::make_unique<ScopedCpuTensorHandle>(*(params.m_ForgetLayerNormWeights));
1697 layer->m_LayerNormParameters.m_CellLayerNormWeights =
1698 std::make_unique<ScopedCpuTensorHandle>(*(params.m_CellLayerNormWeights));
1699 layer->m_LayerNormParameters.m_OutputLayerNormWeights =
1700 std::make_unique<ScopedCpuTensorHandle>(*(params.m_OutputLayerNormWeights));
1701 }
1702 return layer;
1703 }
1704
AddDivisionLayer(const char * name)1705 IConnectableLayer* Network::AddDivisionLayer(const char* name)
1706 {
1707 return m_Graph->AddLayer<DivisionLayer>(name);
1708 }
1709
AddSubtractionLayer(const char * name)1710 IConnectableLayer* Network::AddSubtractionLayer(const char* name)
1711 {
1712 return m_Graph->AddLayer<SubtractionLayer>(name);
1713 }
1714
AddMeanLayer(const MeanDescriptor & meanDescriptor,const char * name)1715 IConnectableLayer* Network::AddMeanLayer(const MeanDescriptor& meanDescriptor, const char* name)
1716 {
1717 return m_Graph->AddLayer<MeanLayer>(meanDescriptor,name);
1718 }
1719
AddPadLayer(const PadDescriptor & padDescriptor,const char * name)1720 IConnectableLayer* Network::AddPadLayer(const PadDescriptor& padDescriptor, const char* name)
1721 {
1722 return m_Graph->AddLayer<PadLayer>(padDescriptor,name);
1723 }
1724
AddQuantizeLayer(const char * name)1725 IConnectableLayer *Network::AddQuantizeLayer(const char *name)
1726 {
1727 return m_Graph->AddLayer<QuantizeLayer>(name);
1728 }
1729
AddDequantizeLayer(const char * name)1730 IConnectableLayer* Network::AddDequantizeLayer(const char* name)
1731 {
1732 return m_Graph->AddLayer<DequantizeLayer>(name);
1733 }
1734
AddStridedSliceLayer(const StridedSliceDescriptor & stridedSliceDescriptor,const char * name)1735 IConnectableLayer* Network::AddStridedSliceLayer(const StridedSliceDescriptor& stridedSliceDescriptor,
1736 const char* name)
1737 {
1738 return m_Graph->AddLayer<StridedSliceLayer>(stridedSliceDescriptor, name);
1739 }
1740
AddGreaterLayer(const char * name)1741 IConnectableLayer* Network::AddGreaterLayer(const char* name)
1742 {
1743 return AddComparisonLayer(ComparisonDescriptor(ComparisonOperation::Greater), name);
1744 }
1745
AddEqualLayer(const char * name)1746 IConnectableLayer* Network::AddEqualLayer(const char* name)
1747 {
1748 return AddComparisonLayer(ComparisonDescriptor(ComparisonOperation::Equal), name);
1749 }
1750
AddRsqrtLayer(const char * name)1751 IConnectableLayer* Network::AddRsqrtLayer(const char * name)
1752 {
1753 return AddElementwiseUnaryLayer(ElementwiseUnaryDescriptor(UnaryOperation::Rsqrt), name);
1754 }
1755
AddGatherLayer(const char * name)1756 IConnectableLayer* Network::AddGatherLayer(const char* name)
1757 {
1758 GatherDescriptor gatherDescriptor{};
1759 return AddGatherLayer(gatherDescriptor, name);
1760 }
1761
AddGatherLayer(const GatherDescriptor & gatherDescriptor,const char * name)1762 IConnectableLayer* Network::AddGatherLayer(const GatherDescriptor& gatherDescriptor,
1763 const char* name)
1764 {
1765 return m_Graph->AddLayer<GatherLayer>(gatherDescriptor, name);
1766 }
1767
AddMergeLayer(const char * name)1768 IConnectableLayer* Network::AddMergeLayer(const char* name)
1769 {
1770 return m_Graph->AddLayer<MergeLayer>(name);
1771 }
1772
AddSwitchLayer(const char * name)1773 IConnectableLayer* Network::AddSwitchLayer(const char* name)
1774 {
1775 return m_Graph->AddLayer<SwitchLayer>(name);
1776 }
1777
AddPreluLayer(const char * name)1778 IConnectableLayer* Network::AddPreluLayer(const char* name)
1779 {
1780 return m_Graph->AddLayer<PreluLayer>(name);
1781 }
1782
AddTransposeConvolution2dLayer(const TransposeConvolution2dDescriptor & descriptor,const ConstTensor & weights,const Optional<ConstTensor> & biases,const char * name)1783 IConnectableLayer* Network::AddTransposeConvolution2dLayer(const TransposeConvolution2dDescriptor& descriptor,
1784 const ConstTensor& weights,
1785 const Optional<ConstTensor>& biases,
1786 const char* name)
1787 {
1788 if (descriptor.m_BiasEnabled && !biases.has_value())
1789 {
1790 throw InvalidArgumentException("AddTransposeConvolution2dLayer: Biases cannot be empty");
1791 }
1792
1793 const auto layer = m_Graph->AddLayer<TransposeConvolution2dLayer>(descriptor, name);
1794
1795 layer->m_Weight = std::make_unique<ScopedCpuTensorHandle>(weights);
1796
1797 if (descriptor.m_BiasEnabled)
1798 {
1799 layer->m_Bias = std::make_unique<ScopedCpuTensorHandle>(biases.value());
1800 }
1801
1802 return layer;
1803 }
1804
AddTransposeLayer(const TransposeDescriptor & transposeDescriptor,const char * name)1805 IConnectableLayer* Network::AddTransposeLayer(const TransposeDescriptor& transposeDescriptor,
1806 const char* name)
1807 {
1808 return m_Graph->AddLayer<TransposeLayer>(transposeDescriptor, name);
1809 }
1810
AddStackLayer(const StackDescriptor & stackDescriptor,const char * name)1811 IConnectableLayer* Network::AddStackLayer(const StackDescriptor& stackDescriptor,
1812 const char* name)
1813 {
1814 return m_Graph->AddLayer<StackLayer>(stackDescriptor, name);
1815 }
1816
1817
AddStandInLayer(const StandInDescriptor & desc,const char * name)1818 IConnectableLayer* Network::AddStandInLayer(const StandInDescriptor& desc,
1819 const char* name)
1820 {
1821 return m_Graph->AddLayer<StandInLayer>(desc, name);
1822 }
1823
AddQuantizedLstmLayer(const QuantizedLstmInputParams & params,const char * name)1824 IConnectableLayer* Network::AddQuantizedLstmLayer(const QuantizedLstmInputParams& params,
1825 const char* name)
1826 {
1827 const auto layer = m_Graph->AddLayer<QuantizedLstmLayer>(name);
1828
1829 // InputToX weights
1830 layer->m_QuantizedLstmParameters.m_InputToInputWeights =
1831 std::make_unique<ScopedCpuTensorHandle>(params.GetInputToInputWeights());
1832 layer->m_QuantizedLstmParameters.m_InputToForgetWeights =
1833 std::make_unique<ScopedCpuTensorHandle>(params.GetInputToForgetWeights());
1834 layer->m_QuantizedLstmParameters.m_InputToCellWeights =
1835 std::make_unique<ScopedCpuTensorHandle>(params.GetInputToCellWeights());
1836 layer->m_QuantizedLstmParameters.m_InputToOutputWeights =
1837 std::make_unique<ScopedCpuTensorHandle>(params.GetInputToOutputWeights());
1838
1839 // RecurrentToX weights
1840 layer->m_QuantizedLstmParameters.m_RecurrentToInputWeights =
1841 std::make_unique<ScopedCpuTensorHandle>(params.GetRecurrentToInputWeights());
1842 layer->m_QuantizedLstmParameters.m_RecurrentToForgetWeights =
1843 std::make_unique<ScopedCpuTensorHandle>(params.GetRecurrentToForgetWeights());
1844 layer->m_QuantizedLstmParameters.m_RecurrentToCellWeights =
1845 std::make_unique<ScopedCpuTensorHandle>(params.GetRecurrentToCellWeights());
1846 layer->m_QuantizedLstmParameters.m_RecurrentToOutputWeights =
1847 std::make_unique<ScopedCpuTensorHandle>(params.GetRecurrentToOutputWeights());
1848
1849 // Bias
1850 layer->m_QuantizedLstmParameters.m_InputGateBias =
1851 std::make_unique<ScopedCpuTensorHandle>(params.GetInputGateBias());
1852 layer->m_QuantizedLstmParameters.m_ForgetGateBias =
1853 std::make_unique<ScopedCpuTensorHandle>(params.GetForgetGateBias());
1854 layer->m_QuantizedLstmParameters.m_CellBias =
1855 std::make_unique<ScopedCpuTensorHandle>(params.GetCellBias());
1856 layer->m_QuantizedLstmParameters.m_OutputGateBias =
1857 std::make_unique<ScopedCpuTensorHandle>(params.GetOutputGateBias());
1858
1859 return layer;
1860 }
1861
AddQLstmLayer(const QLstmDescriptor & descriptor,const LstmInputParams & params,const char * name)1862 IConnectableLayer* Network::AddQLstmLayer(const QLstmDescriptor& descriptor,
1863 const LstmInputParams& params,
1864 const char* name)
1865 {
1866 const auto layer = m_Graph->AddLayer<QLstmLayer>(descriptor, name);
1867
1868 // QLstm Basic Parameters
1869 layer->m_BasicParameters.m_InputToForgetWeights =
1870 std::make_unique<ScopedCpuTensorHandle>(*(params.m_InputToForgetWeights));
1871 layer->m_BasicParameters.m_InputToCellWeights =
1872 std::make_unique<ScopedCpuTensorHandle>(*(params.m_InputToCellWeights));
1873 layer->m_BasicParameters.m_InputToOutputWeights =
1874 std::make_unique<ScopedCpuTensorHandle>(*(params.m_InputToOutputWeights));
1875 layer->m_BasicParameters.m_RecurrentToForgetWeights =
1876 std::make_unique<ScopedCpuTensorHandle>(*(params.m_RecurrentToForgetWeights));
1877 layer->m_BasicParameters.m_RecurrentToCellWeights =
1878 std::make_unique<ScopedCpuTensorHandle>(*(params.m_RecurrentToCellWeights));
1879 layer->m_BasicParameters.m_RecurrentToOutputWeights =
1880 std::make_unique<ScopedCpuTensorHandle>(*(params.m_RecurrentToOutputWeights));
1881 layer->m_BasicParameters.m_ForgetGateBias =
1882 std::make_unique<ScopedCpuTensorHandle>(*(params.m_ForgetGateBias));
1883 layer->m_BasicParameters.m_CellBias =
1884 std::make_unique<ScopedCpuTensorHandle>(*(params.m_CellBias));
1885 layer->m_BasicParameters.m_OutputGateBias =
1886 std::make_unique<ScopedCpuTensorHandle>(*(params.m_OutputGateBias));
1887
1888 // QLstm Cifg parameters
1889 if(!descriptor.m_CifgEnabled)
1890 {
1891 if(params.m_InputToInputWeights == nullptr)
1892 {
1893 throw InvalidArgumentException("AddQLstmLayer: Input To Input Weights cannot be NULL");
1894 }
1895
1896 if(params.m_RecurrentToInputWeights == nullptr)
1897 {
1898 throw InvalidArgumentException(
1899 "AddQLstmLayer: Recurrent To Input Weights cannot be NULL");
1900 }
1901
1902 if(params.m_InputGateBias == nullptr)
1903 {
1904 throw InvalidArgumentException("AddQLstmLayer: Input Gate Bias cannot be NULL");
1905 }
1906
1907 layer->m_CifgParameters.m_InputToInputWeights =
1908 std::make_unique<ScopedCpuTensorHandle>(*(params.m_InputToInputWeights));
1909 layer->m_CifgParameters.m_RecurrentToInputWeights =
1910 std::make_unique<ScopedCpuTensorHandle>(*(params.m_RecurrentToInputWeights));
1911 layer->m_CifgParameters.m_InputGateBias =
1912 std::make_unique<ScopedCpuTensorHandle>(*(params.m_InputGateBias));
1913 }
1914
1915 // QLstm Projection parameters
1916 if(descriptor.m_ProjectionEnabled)
1917 {
1918 if(params.m_ProjectionWeights == nullptr)
1919 {
1920 throw InvalidArgumentException("AddQLstmLayer: Projection Weights cannot be NULL");
1921 }
1922
1923 layer->m_ProjectionParameters.m_ProjectionWeights =
1924 std::make_unique<ScopedCpuTensorHandle>(*(params.m_ProjectionWeights));
1925
1926 // Projection bias is optional even if projection is enabled
1927 if(params.m_ProjectionWeights != nullptr)
1928 {
1929 layer->m_ProjectionParameters.m_ProjectionBias =
1930 std::make_unique<ScopedCpuTensorHandle>(*(params.m_ProjectionBias));
1931 }
1932
1933 }
1934
1935 // QLstm Peephole params
1936 if(descriptor.m_PeepholeEnabled)
1937 {
1938 if(params.m_CellToForgetWeights == nullptr)
1939 {
1940 throw InvalidArgumentException("AddQLstmLayer: Cell To Forget Weights cannot be NULL");
1941 }
1942
1943 if(params.m_CellToOutputWeights == nullptr)
1944 {
1945 throw InvalidArgumentException("AddQLstmLayer: Cell To Output Weights cannot be NULL");
1946 }
1947
1948 if(!descriptor.m_CifgEnabled)
1949 {
1950 if(params.m_CellToInputWeights == nullptr)
1951 {
1952 throw InvalidArgumentException("AddQLstmLayer: Cell To Input Weights cannot be NULL");
1953 }
1954
1955 layer->m_PeepholeParameters.m_CellToInputWeights =
1956 std::make_unique<ScopedCpuTensorHandle>(*(params.m_CellToInputWeights));
1957 }
1958
1959 layer->m_PeepholeParameters.m_CellToForgetWeights =
1960 std::make_unique<ScopedCpuTensorHandle>(*(params.m_CellToForgetWeights));
1961 layer->m_PeepholeParameters.m_CellToOutputWeights =
1962 std::make_unique<ScopedCpuTensorHandle>(*(params.m_CellToOutputWeights));
1963 }
1964
1965 // QLstm Layer Normalization params
1966 if(descriptor.m_LayerNormEnabled)
1967 {
1968 if(params.m_ForgetLayerNormWeights == nullptr)
1969 {
1970 throw InvalidArgumentException("AddQLstmLayer: Forget layer normalization weights cannot be NULL");
1971 }
1972
1973 if(params.m_CellLayerNormWeights == nullptr)
1974 {
1975 throw InvalidArgumentException("AddQLstmLayer: Cell layer normalization weights cannot be NULL");
1976 }
1977
1978 if(params.m_OutputLayerNormWeights == nullptr)
1979 {
1980 throw InvalidArgumentException("AddQLstmLayer: Output layer normalization weights cannot be NULL");
1981 }
1982
1983 if(!descriptor.m_CifgEnabled)
1984 {
1985 if(params.m_InputLayerNormWeights == nullptr)
1986 {
1987 throw InvalidArgumentException("AddQLstmLayer: Input layer normalization weights cannot be NULL");
1988 }
1989
1990 layer->m_LayerNormParameters.m_InputLayerNormWeights =
1991 std::make_unique<ScopedCpuTensorHandle>(*(params.m_InputLayerNormWeights));
1992 }
1993
1994 layer->m_LayerNormParameters.m_ForgetLayerNormWeights =
1995 std::make_unique<ScopedCpuTensorHandle>(*(params.m_ForgetLayerNormWeights));
1996 layer->m_LayerNormParameters.m_CellLayerNormWeights =
1997 std::make_unique<ScopedCpuTensorHandle>(*(params.m_CellLayerNormWeights));
1998 layer->m_LayerNormParameters.m_OutputLayerNormWeights =
1999 std::make_unique<ScopedCpuTensorHandle>(*(params.m_OutputLayerNormWeights));
2000 }
2001 return layer;
2002 }
2003
AddLogicalBinaryLayer(const LogicalBinaryDescriptor & logicalBinaryDescriptor,const char * name)2004 IConnectableLayer* Network::AddLogicalBinaryLayer(const LogicalBinaryDescriptor& logicalBinaryDescriptor,
2005 const char* name)
2006 {
2007 return m_Graph->AddLayer<LogicalBinaryLayer>(logicalBinaryDescriptor, name);
2008 }
2009
Accept(ILayerVisitor & visitor) const2010 void Network::Accept(ILayerVisitor& visitor) const
2011 {
2012 for (auto layer : GetGraph())
2013 {
2014 layer->Accept(visitor);
2015 };
2016 }
2017
OptimizedNetwork(std::unique_ptr<Graph> graph)2018 OptimizedNetwork::OptimizedNetwork(std::unique_ptr<Graph> graph)
2019 : m_Graph(std::move(graph)), m_Guid(profiling::ProfilingService::GetNextGuid())
2020 {
2021 }
2022
OptimizedNetwork(std::unique_ptr<Graph> graph,const ModelOptions & modelOptions)2023 OptimizedNetwork::OptimizedNetwork(std::unique_ptr<Graph> graph, const ModelOptions& modelOptions)
2024 : m_Graph(std::move(graph)), m_Guid(profiling::ProfilingService::GetNextGuid()), m_ModelOptions(modelOptions)
2025 {
2026 }
2027
~OptimizedNetwork()2028 OptimizedNetwork::~OptimizedNetwork()
2029 {
2030 }
2031
2032 } // namespace armnn
2033