1 //
2 // Copyright © 2017-2023 Arm Ltd and Contributors. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5
6 #include "Network.hpp"
7 #include "Graph.hpp"
8 #include "Layer.hpp"
9 #include "DeviceSpec.hpp"
10 #include "Optimizer.hpp"
11 #include "SubgraphViewSelector.hpp"
12 #include "BackendSettings.hpp"
13 #include "optimizations/All.hpp"
14 #include "armnnUtils/Filesystem.hpp"
15 #include "armnn/utility/Timer.hpp"
16
17 #include <armnn/backends/TensorHandle.hpp>
18 #include <armnn/backends/WorkloadFactory.hpp>
19 #include <armnn/backends/IBackendInternal.hpp>
20 #include <backendsCommon/TensorHandleFactoryRegistry.hpp>
21
22 #include <armnn/Exceptions.hpp>
23 #include <armnn/TypesUtils.hpp>
24 #include <armnn/BackendRegistry.hpp>
25 #include <armnn/Logging.hpp>
26 #include <armnn/utility/Assert.hpp>
27 #include <armnn/utility/IgnoreUnused.hpp>
28 #include <armnn/utility/PolymorphicDowncast.hpp>
29
30 #include <client/include/IProfilingService.hpp>
31
32 #include <common/include/ProfilingGuid.hpp>
33
34 #include <fmt/format.h>
35
36 #include <fcntl.h>
37 #include <algorithm>
38 #include <memory>
39 #include <vector>
40
41 namespace armnn
42 {
43
INetwork(NetworkOptions networkOptions)44 INetwork::INetwork(NetworkOptions networkOptions) : pNetworkImpl(new NetworkImpl(networkOptions)) {}
45
46 INetwork::~INetwork() = default;
47
OptimizerOptionsOpaque()48 OptimizerOptionsOpaque::OptimizerOptionsOpaque()
49 : p_OptimizerOptionsImpl(std::make_unique<OptimizerOptionsOpaqueImpl>())
50 {
51 }
52
OptimizerOptionsOpaque(OptimizerOptionsOpaque const & other)53 OptimizerOptionsOpaque::OptimizerOptionsOpaque(OptimizerOptionsOpaque const &other)
54 : p_OptimizerOptionsImpl(std::make_unique<OptimizerOptionsOpaqueImpl>(*other.p_OptimizerOptionsImpl))
55 {
56 }
57
58 OptimizerOptionsOpaque::~OptimizerOptionsOpaque() = default;
59
OptimizerOptionsOpaque(bool reduceFp32ToFp16,bool debug,bool reduceFp32ToBf16,bool importEnabled,ModelOptions modelOptions,bool exportEnabled,bool debugToFile)60 OptimizerOptionsOpaque::OptimizerOptionsOpaque(bool reduceFp32ToFp16, bool debug, bool reduceFp32ToBf16,
61 bool importEnabled, ModelOptions modelOptions, bool exportEnabled,
62 bool debugToFile)
63 : p_OptimizerOptionsImpl(std::make_unique<OptimizerOptionsOpaqueImpl>(reduceFp32ToFp16, debug, reduceFp32ToBf16,
64 importEnabled, modelOptions,
65 exportEnabled, debugToFile))
66 {
67 }
68
OptimizerOptionsOpaque(bool reduceFp32ToFp16,bool debug,bool reduceFp32ToBf16,ShapeInferenceMethod shapeInferenceMethod,bool importEnabled,ModelOptions modelOptions,bool exportEnabled,bool debugToFile,bool allowExpandedDims)69 OptimizerOptionsOpaque::OptimizerOptionsOpaque(bool reduceFp32ToFp16, bool debug, bool reduceFp32ToBf16,
70 ShapeInferenceMethod shapeInferenceMethod,
71 bool importEnabled, ModelOptions modelOptions, bool exportEnabled,
72 bool debugToFile, bool allowExpandedDims)
73 : p_OptimizerOptionsImpl(std::make_unique<OptimizerOptionsOpaqueImpl>(reduceFp32ToFp16, debug, reduceFp32ToBf16,
74 shapeInferenceMethod, importEnabled,
75 modelOptions, exportEnabled,
76 debugToFile, allowExpandedDims))
77 {
78 }
79
OptimizerOptionsOpaque(const OptimizerOptions & OptimizerStruct)80 OptimizerOptionsOpaque::OptimizerOptionsOpaque(const OptimizerOptions& OptimizerStruct)
81 : p_OptimizerOptionsImpl(std::make_unique<OptimizerOptionsOpaqueImpl>())
82 {
83 p_OptimizerOptionsImpl->m_ImportEnabled = OptimizerStruct.m_ImportEnabled;
84 p_OptimizerOptionsImpl->m_shapeInferenceMethod = OptimizerStruct.m_shapeInferenceMethod;
85 p_OptimizerOptionsImpl->m_ModelOptions = OptimizerStruct.m_ModelOptions;
86 p_OptimizerOptionsImpl->m_ProfilingEnabled = OptimizerStruct.m_ProfilingEnabled;
87 p_OptimizerOptionsImpl->m_DebugToFile = OptimizerStruct.m_DebugToFile;
88 p_OptimizerOptionsImpl->m_Debug = OptimizerStruct.m_Debug;
89 p_OptimizerOptionsImpl->m_ReduceFp32ToFp16 = OptimizerStruct.m_ReduceFp32ToFp16;
90 p_OptimizerOptionsImpl->m_ExportEnabled = OptimizerStruct.m_ExportEnabled;
91 p_OptimizerOptionsImpl->m_AllowExpandedDims = OptimizerStruct.m_AllowExpandedDims;
92 p_OptimizerOptionsImpl->m_ReduceFp32ToBf16 = OptimizerStruct.m_ReduceFp32ToBf16;
93 }
94
operator =(OptimizerOptionsOpaque other)95 OptimizerOptionsOpaque& OptimizerOptionsOpaque::operator= (OptimizerOptionsOpaque other)
96 {
97 p_OptimizerOptionsImpl->m_ImportEnabled = other.GetImportEnabled();
98 p_OptimizerOptionsImpl->m_shapeInferenceMethod = other.GetShapeInferenceMethod();
99 p_OptimizerOptionsImpl->m_ModelOptions = other.GetModelOptions();
100 p_OptimizerOptionsImpl->m_ProfilingEnabled = other.GetProfilingEnabled();
101 p_OptimizerOptionsImpl->m_DebugToFile = other.GetDebugToFileEnabled();
102 p_OptimizerOptionsImpl->m_Debug = other.GetDebugEnabled();
103 p_OptimizerOptionsImpl->m_ReduceFp32ToFp16 = other.GetReduceFp32ToFp16();
104 p_OptimizerOptionsImpl->m_ExportEnabled = other.GetExportEnabled();
105 p_OptimizerOptionsImpl->m_AllowExpandedDims = other.GetAllowExpandedDims();
106 p_OptimizerOptionsImpl->m_ReduceFp32ToBf16 = other.GetReduceFp32ToBf16();
107 return *this;
108 }
109
SetImportEnabled(bool ImportState)110 void OptimizerOptionsOpaque::SetImportEnabled(bool ImportState)
111 {
112 p_OptimizerOptionsImpl->m_ImportEnabled = ImportState;
113 }
114
SetExportEnabled(bool ExportState)115 void OptimizerOptionsOpaque::SetExportEnabled(bool ExportState)
116 {
117 p_OptimizerOptionsImpl->m_ExportEnabled = ExportState;
118 }
119
SetProfilingEnabled(bool ProfilingState)120 void OptimizerOptionsOpaque::SetProfilingEnabled(bool ProfilingState)
121 {
122 p_OptimizerOptionsImpl->m_ProfilingEnabled = ProfilingState;
123 }
124
SetDebugEnabled(bool DebugState)125 void OptimizerOptionsOpaque::SetDebugEnabled(bool DebugState)
126 {
127 p_OptimizerOptionsImpl->m_Debug = DebugState;
128 }
129
SetDebugToFileEnabled(bool DebugFileState)130 void OptimizerOptionsOpaque::SetDebugToFileEnabled(bool DebugFileState)
131 {
132 p_OptimizerOptionsImpl->m_DebugToFile = DebugFileState;
133 }
134
SetReduceFp32ToFp16(bool ReduceFp32ToFp16State)135 void OptimizerOptionsOpaque::SetReduceFp32ToFp16(bool ReduceFp32ToFp16State)
136 {
137 p_OptimizerOptionsImpl->m_ReduceFp32ToFp16 = ReduceFp32ToFp16State;
138 }
139
SetShapeInferenceMethod(armnn::ShapeInferenceMethod ShapeInferenceMethodType)140 void OptimizerOptionsOpaque::SetShapeInferenceMethod(armnn::ShapeInferenceMethod ShapeInferenceMethodType)
141 {
142 p_OptimizerOptionsImpl->m_shapeInferenceMethod = ShapeInferenceMethodType;
143 }
144
SetAllowExpandedDims(bool ExpandedDimsAllowed)145 void OptimizerOptionsOpaque::SetAllowExpandedDims(bool ExpandedDimsAllowed)
146 {
147 p_OptimizerOptionsImpl->m_AllowExpandedDims = ExpandedDimsAllowed;
148 }
149
AddModelOption(armnn::BackendOptions NewModelOption)150 void OptimizerOptionsOpaque::AddModelOption(armnn::BackendOptions NewModelOption)
151 {
152 p_OptimizerOptionsImpl->m_ModelOptions.push_back(NewModelOption);
153 }
154
GetProfilingEnabled() const155 bool OptimizerOptionsOpaque::GetProfilingEnabled() const
156 {
157 return p_OptimizerOptionsImpl->m_ProfilingEnabled;
158 };
159
GetImportEnabled() const160 bool OptimizerOptionsOpaque::GetImportEnabled() const
161 {
162 return p_OptimizerOptionsImpl->m_ImportEnabled;
163 };
164
GetExportEnabled() const165 bool OptimizerOptionsOpaque::GetExportEnabled() const
166 {
167 return p_OptimizerOptionsImpl->m_ExportEnabled;
168 };
169
GetReduceFp32ToFp16() const170 bool OptimizerOptionsOpaque::GetReduceFp32ToFp16() const
171 {
172 return p_OptimizerOptionsImpl->m_ReduceFp32ToFp16;
173 };
174
GetReduceFp32ToBf16() const175 bool OptimizerOptionsOpaque::GetReduceFp32ToBf16() const
176 {
177 return p_OptimizerOptionsImpl->m_ReduceFp32ToBf16;
178 }
179
GetDebugEnabled() const180 bool OptimizerOptionsOpaque::GetDebugEnabled() const
181 {
182 return p_OptimizerOptionsImpl->m_Debug;
183 }
184
GetDebugToFileEnabled() const185 bool OptimizerOptionsOpaque::GetDebugToFileEnabled() const
186 {
187 return p_OptimizerOptionsImpl->m_DebugToFile;
188 }
189
GetAllowExpandedDims() const190 bool OptimizerOptionsOpaque::GetAllowExpandedDims() const
191 {
192 return p_OptimizerOptionsImpl->m_AllowExpandedDims;
193 }
194
GetModelOptions() const195 armnn::ModelOptions OptimizerOptionsOpaque::GetModelOptions() const
196 {
197 return p_OptimizerOptionsImpl->m_ModelOptions;
198 }
199
GetShapeInferenceMethod() const200 armnn::ShapeInferenceMethod OptimizerOptionsOpaque::GetShapeInferenceMethod() const
201 {
202 return p_OptimizerOptionsImpl->m_shapeInferenceMethod;
203 }
204
ToString() const205 const std::string OptimizerOptionsOpaque::ToString() const
206 {
207 std::stringstream stream;
208 stream << "OptimizerOptions: \n";
209 stream << "\tReduceFp32ToFp16: " << p_OptimizerOptionsImpl->m_ReduceFp32ToFp16 << "\n";
210 stream << "\tReduceFp32ToBf16: " << p_OptimizerOptionsImpl->m_ReduceFp32ToBf16 << "\n";
211 stream << "\tDebug: " << p_OptimizerOptionsImpl->m_Debug << "\n";
212 stream << "\tDebug to file: " << p_OptimizerOptionsImpl->m_DebugToFile << "\n";
213 stream << "\tShapeInferenceMethod: " <<
214 (p_OptimizerOptionsImpl->m_shapeInferenceMethod == ShapeInferenceMethod::ValidateOnly ?
215 "ValidateOnly" : "InferAndValidate") << "\n";
216 stream << "\tImportEnabled: " << p_OptimizerOptionsImpl->m_ImportEnabled << "\n";
217 stream << "\tExportEnabled: " << p_OptimizerOptionsImpl->m_ExportEnabled << "\n";
218 stream << "\tProfilingEnabled: " << p_OptimizerOptionsImpl->m_ProfilingEnabled << "\n";
219 stream << "\tAllowExpandedDims: " << p_OptimizerOptionsImpl->m_AllowExpandedDims << "\n";
220
221 stream << "\tModelOptions: \n";
222 for (auto optionsGroup : p_OptimizerOptionsImpl->m_ModelOptions)
223 {
224 for (size_t i=0; i < optionsGroup.GetOptionCount(); i++)
225 {
226 const armnn::BackendOptions::BackendOption option = optionsGroup.GetOption(i);
227 stream << "\t\tBackend: " << optionsGroup.GetBackendId() << "\n"
228 << "\t\t\tOption: " << option.GetName() << "\n"
229 << "\t\t\tValue: " << std::string(option.GetValue().ToString()) << "\n";
230 }
231 }
232
233 return stream.str();
234 }
235
PrintGraph()236 Status INetwork::PrintGraph()
237 {
238 return pNetworkImpl->PrintGraph();
239 }
240
AddInputLayer(LayerBindingId id,const char * name)241 IConnectableLayer* INetwork::AddInputLayer(LayerBindingId id, const char* name)
242 {
243 return pNetworkImpl->AddInputLayer(id, name);
244 }
245
AddArgMinMaxLayer(const ArgMinMaxDescriptor & desc,const char * name)246 IConnectableLayer* INetwork::AddArgMinMaxLayer(const ArgMinMaxDescriptor& desc,
247 const char* name)
248 {
249 return pNetworkImpl->AddArgMinMaxLayer(desc, name);
250 }
251
AddCastLayer(const char * name)252 IConnectableLayer* INetwork::AddCastLayer(const char* name)
253 {
254 return pNetworkImpl->AddCastLayer(name);
255 }
256
AddComparisonLayer(const ComparisonDescriptor & comparisonDescriptor,const char * name)257 IConnectableLayer* INetwork::AddComparisonLayer(const ComparisonDescriptor& comparisonDescriptor,
258 const char* name)
259 {
260 return pNetworkImpl->AddComparisonLayer(comparisonDescriptor, name);
261 }
262
263
AddConcatLayer(const ConcatDescriptor & concatDescriptor,const char * name)264 IConnectableLayer* INetwork::AddConcatLayer(const ConcatDescriptor& concatDescriptor,
265 const char* name)
266 {
267 return pNetworkImpl->AddConcatLayer(concatDescriptor, name);
268 }
269
270
AddConvolution2dLayer(const Convolution2dDescriptor & convolution2dDescriptor,const char * name)271 IConnectableLayer* INetwork::AddConvolution2dLayer(const Convolution2dDescriptor& convolution2dDescriptor,
272 const char* name)
273 {
274 return pNetworkImpl->AddConvolution2dLayer(convolution2dDescriptor, name);
275 }
276
AddConvolution3dLayer(const Convolution3dDescriptor & convolution3dDescriptor,const char * name)277 IConnectableLayer* INetwork::AddConvolution3dLayer(const Convolution3dDescriptor& convolution3dDescriptor,
278 const char* name)
279 {
280 return pNetworkImpl->AddConvolution3dLayer(convolution3dDescriptor, name);
281 }
282
283
AddDepthToSpaceLayer(const DepthToSpaceDescriptor & depthToSpaceDescriptor,const char * name)284 IConnectableLayer* INetwork::AddDepthToSpaceLayer(const DepthToSpaceDescriptor& depthToSpaceDescriptor,
285 const char* name)
286 {
287 return pNetworkImpl->AddDepthToSpaceLayer(depthToSpaceDescriptor, name);
288 }
289
290
AddDepthwiseConvolution2dLayer(const DepthwiseConvolution2dDescriptor & convolution2dDescriptor,const char * name)291 IConnectableLayer* INetwork::AddDepthwiseConvolution2dLayer(
292 const DepthwiseConvolution2dDescriptor& convolution2dDescriptor,
293 const char* name)
294 {
295 return pNetworkImpl->AddDepthwiseConvolution2dLayer(convolution2dDescriptor, name);
296 }
297
298
AddDequantizeLayer(const char * name)299 IConnectableLayer* INetwork::AddDequantizeLayer(const char* name)
300 {
301 return pNetworkImpl->AddDequantizeLayer(name);
302 }
303
304
AddDetectionPostProcessLayer(const DetectionPostProcessDescriptor & descriptor,const ConstTensor & anchors,const char * name)305 IConnectableLayer* INetwork::AddDetectionPostProcessLayer(
306 const DetectionPostProcessDescriptor& descriptor,
307 const ConstTensor& anchors,
308 const char* name)
309 {
310 return pNetworkImpl->AddDetectionPostProcessLayer(descriptor, anchors, name);
311 }
312
AddElementwiseBinaryLayer(const ElementwiseBinaryDescriptor & elementwiseBinaryDescriptor,const char * name)313 IConnectableLayer* INetwork::AddElementwiseBinaryLayer(const ElementwiseBinaryDescriptor& elementwiseBinaryDescriptor,
314 const char* name)
315 {
316 return pNetworkImpl->AddElementwiseBinaryLayer(elementwiseBinaryDescriptor, name);
317 }
318
AddElementwiseUnaryLayer(const ElementwiseUnaryDescriptor & elementwiseUnaryDescriptor,const char * name)319 IConnectableLayer* INetwork::AddElementwiseUnaryLayer(const ElementwiseUnaryDescriptor& elementwiseUnaryDescriptor,
320 const char* name)
321 {
322 return pNetworkImpl->AddElementwiseUnaryLayer(elementwiseUnaryDescriptor, name);
323 }
324
AddFillLayer(const FillDescriptor & fillDescriptor,const char * name)325 IConnectableLayer* INetwork::AddFillLayer(const FillDescriptor& fillDescriptor,
326 const char* name)
327 {
328 return pNetworkImpl->AddFillLayer(fillDescriptor, name);
329 }
330
AddFullyConnectedLayer(const FullyConnectedDescriptor & fullyConnectedDescriptor,const char * name)331 IConnectableLayer* INetwork::AddFullyConnectedLayer(const FullyConnectedDescriptor& fullyConnectedDescriptor,
332 const char* name)
333 {
334 return pNetworkImpl->AddFullyConnectedLayer(fullyConnectedDescriptor, name);
335 }
336
AddPermuteLayer(const PermuteDescriptor & permuteDescriptor,const char * name)337 IConnectableLayer* INetwork::AddPermuteLayer(const PermuteDescriptor& permuteDescriptor,
338 const char* name)
339 {
340 return pNetworkImpl->AddPermuteLayer(permuteDescriptor, name);
341 }
342
AddBatchToSpaceNdLayer(const BatchToSpaceNdDescriptor & batchToSpaceNdDescriptor,const char * name)343 IConnectableLayer* INetwork::AddBatchToSpaceNdLayer(const BatchToSpaceNdDescriptor& batchToSpaceNdDescriptor,
344 const char* name)
345 {
346 return pNetworkImpl->AddBatchToSpaceNdLayer(batchToSpaceNdDescriptor, name);
347 }
348
AddPooling2dLayer(const Pooling2dDescriptor & pooling2dDescriptor,const char * name)349 IConnectableLayer* INetwork::AddPooling2dLayer(const Pooling2dDescriptor& pooling2dDescriptor,
350 const char* name)
351 {
352 return pNetworkImpl->AddPooling2dLayer(pooling2dDescriptor, name);
353 }
354
AddPooling3dLayer(const Pooling3dDescriptor & pooling3dDescriptor,const char * name)355 IConnectableLayer* INetwork::AddPooling3dLayer(const Pooling3dDescriptor& pooling3dDescriptor,
356 const char* name)
357 {
358 return pNetworkImpl->AddPooling3dLayer(pooling3dDescriptor, name);
359 }
360
AddPrecompiledLayer(const PreCompiledDescriptor & preCompiledDescriptor,CompiledBlobPtr compiledBlobPtr,const Optional<BackendId> & backend,const char * name)361 IConnectableLayer* INetwork::AddPrecompiledLayer(const PreCompiledDescriptor& preCompiledDescriptor,
362 CompiledBlobPtr compiledBlobPtr,
363 const Optional<BackendId>& backend,
364 const char* name)
365 {
366 return pNetworkImpl->AddPrecompiledLayer(preCompiledDescriptor, std::move(compiledBlobPtr), backend, name);
367 }
368
AddActivationLayer(const ActivationDescriptor & activationDescriptor,const char * name)369 IConnectableLayer* INetwork::AddActivationLayer(const ActivationDescriptor& activationDescriptor,
370 const char* name)
371 {
372 return pNetworkImpl->AddActivationLayer(activationDescriptor, name);
373 }
374
AddNormalizationLayer(const NormalizationDescriptor & normalizationDescriptor,const char * name)375 IConnectableLayer* INetwork::AddNormalizationLayer(const NormalizationDescriptor& normalizationDescriptor,
376 const char* name)
377 {
378 return pNetworkImpl->AddNormalizationLayer(normalizationDescriptor, name);
379 }
380
AddSliceLayer(const SliceDescriptor & sliceDescriptor,const char * name)381 IConnectableLayer* INetwork::AddSliceLayer(const SliceDescriptor& sliceDescriptor, const char* name)
382 {
383 return pNetworkImpl->AddSliceLayer(sliceDescriptor, name);
384 }
AddSoftmaxLayer(const SoftmaxDescriptor & softmaxDescriptor,const char * name)385 IConnectableLayer* INetwork::AddSoftmaxLayer(const SoftmaxDescriptor& softmaxDescriptor,
386 const char* name)
387 {
388 return pNetworkImpl->AddSoftmaxLayer(softmaxDescriptor, name);
389 }
390
AddSplitterLayer(const ViewsDescriptor & splitterDescriptor,const char * name)391 IConnectableLayer* INetwork::AddSplitterLayer(const ViewsDescriptor& splitterDescriptor,
392 const char* name)
393 {
394 return pNetworkImpl->AddSplitterLayer(splitterDescriptor, name);
395 }
396
AddMergeLayer(const char * name)397 IConnectableLayer* INetwork::AddMergeLayer(const char* name)
398 {
399 return pNetworkImpl->AddMergeLayer(name);
400 }
401
AddAdditionLayer(const char * name)402 IConnectableLayer* INetwork::AddAdditionLayer(const char* name)
403 {
404 ARMNN_NO_DEPRECATE_WARN_BEGIN
405 return pNetworkImpl->AddAdditionLayer(name);
406 ARMNN_NO_DEPRECATE_WARN_END
407 }
408
AddMultiplicationLayer(const char * name)409 IConnectableLayer* INetwork::AddMultiplicationLayer(const char* name)
410 {
411 ARMNN_NO_DEPRECATE_WARN_BEGIN
412 return pNetworkImpl->AddMultiplicationLayer(name);
413 ARMNN_NO_DEPRECATE_WARN_END
414 }
415
AddBatchNormalizationLayer(const BatchNormalizationDescriptor & desc,const ConstTensor & mean,const ConstTensor & variance,const ConstTensor & beta,const ConstTensor & gamma,const char * name)416 IConnectableLayer* INetwork::AddBatchNormalizationLayer(const BatchNormalizationDescriptor& desc,
417 const ConstTensor& mean,
418 const ConstTensor& variance,
419 const ConstTensor& beta,
420 const ConstTensor& gamma,
421 const char* name)
422 {
423 return pNetworkImpl->AddBatchNormalizationLayer(desc, mean, variance, beta, gamma, name);
424 }
425
AddRankLayer(const char * name)426 IConnectableLayer* INetwork::AddRankLayer(const char* name)
427 {
428 return pNetworkImpl->AddRankLayer(name);
429 }
430
AddResizeLayer(const ResizeDescriptor & resizeDescriptor,const char * name)431 IConnectableLayer* INetwork::AddResizeLayer(const ResizeDescriptor& resizeDescriptor,
432 const char* name)
433 {
434 return pNetworkImpl->AddResizeLayer(resizeDescriptor, name);
435 }
436
AddReduceLayer(const ReduceDescriptor & reduceDescriptor,const char * name)437 IConnectableLayer* INetwork::AddReduceLayer(const ReduceDescriptor& reduceDescriptor,
438 const char* name)
439 {
440 return pNetworkImpl->AddReduceLayer(reduceDescriptor, name);
441 }
442
AddInstanceNormalizationLayer(const InstanceNormalizationDescriptor & desc,const char * name)443 IConnectableLayer* INetwork::AddInstanceNormalizationLayer(const InstanceNormalizationDescriptor& desc,
444 const char* name)
445 {
446 return pNetworkImpl->AddInstanceNormalizationLayer(desc, name);
447 }
448
AddL2NormalizationLayer(const L2NormalizationDescriptor & desc,const char * name)449 IConnectableLayer* INetwork::AddL2NormalizationLayer(const L2NormalizationDescriptor& desc,
450 const char* name)
451 {
452 return pNetworkImpl->AddL2NormalizationLayer(desc, name);
453 }
454
AddLogSoftmaxLayer(const LogSoftmaxDescriptor & logSoftmaxDescriptor,const char * name)455 IConnectableLayer* INetwork::AddLogSoftmaxLayer(const LogSoftmaxDescriptor& logSoftmaxDescriptor,
456 const char* name)
457 {
458 return pNetworkImpl->AddLogSoftmaxLayer(logSoftmaxDescriptor, name);
459 }
460
AddConstantLayer(const ConstTensor & input,const char * name)461 IConnectableLayer* INetwork::AddConstantLayer(const ConstTensor& input,
462 const char* name)
463 {
464 return pNetworkImpl->AddConstantLayer(input, name);
465 }
466
AddReshapeLayer(const ReshapeDescriptor & reshapeDescriptor,const char * name)467 IConnectableLayer* INetwork::AddReshapeLayer(const ReshapeDescriptor& reshapeDescriptor,
468 const char* name)
469 {
470 return pNetworkImpl->AddReshapeLayer(reshapeDescriptor, name);
471 }
472
AddSpaceToBatchNdLayer(const SpaceToBatchNdDescriptor & spaceToBatchNdDescriptor,const char * name)473 IConnectableLayer* INetwork::AddSpaceToBatchNdLayer(const SpaceToBatchNdDescriptor& spaceToBatchNdDescriptor,
474 const char* name)
475 {
476 return pNetworkImpl->AddSpaceToBatchNdLayer(spaceToBatchNdDescriptor, name);
477 }
478
AddSpaceToDepthLayer(const SpaceToDepthDescriptor & spaceToDepthDescriptor,const char * name)479 IConnectableLayer* INetwork::AddSpaceToDepthLayer(const SpaceToDepthDescriptor& spaceToDepthDescriptor,
480 const char* name)
481 {
482 return pNetworkImpl->AddSpaceToDepthLayer(spaceToDepthDescriptor, name);
483 }
484
AddFloorLayer(const char * name)485 IConnectableLayer* INetwork::AddFloorLayer(const char* name)
486 {
487 return pNetworkImpl->AddFloorLayer(name);
488 }
AddOutputLayer(LayerBindingId id,const char * name)489 IConnectableLayer* INetwork::AddOutputLayer(LayerBindingId id, const char* name)
490 {
491 return pNetworkImpl->AddOutputLayer(id, name);
492 }
493
AddLstmLayer(const LstmDescriptor & descriptor,const LstmInputParams & params,const char * name)494 IConnectableLayer* INetwork::AddLstmLayer(const LstmDescriptor& descriptor,
495 const LstmInputParams& params,
496 const char* name)
497 {
498 return pNetworkImpl->AddLstmLayer(descriptor, params, name);
499 }
500
AddDivisionLayer(const char * name)501 IConnectableLayer* INetwork::AddDivisionLayer(const char* name)
502 {
503 ARMNN_NO_DEPRECATE_WARN_BEGIN
504 return pNetworkImpl->AddDivisionLayer(name);
505 ARMNN_NO_DEPRECATE_WARN_END
506 }
507
AddSubtractionLayer(const char * name)508 IConnectableLayer* INetwork::AddSubtractionLayer(const char* name)
509 {
510 ARMNN_NO_DEPRECATE_WARN_BEGIN
511 return pNetworkImpl->AddSubtractionLayer(name);
512 ARMNN_NO_DEPRECATE_WARN_END
513 }
514
AddMaximumLayer(const char * name)515 IConnectableLayer* INetwork::AddMaximumLayer(const char* name)
516 {
517 ARMNN_NO_DEPRECATE_WARN_BEGIN
518 return pNetworkImpl->AddMaximumLayer(name);
519 ARMNN_NO_DEPRECATE_WARN_END
520 }
521
AddMeanLayer(const MeanDescriptor & meanDescriptor,const char * name)522 IConnectableLayer* INetwork::AddMeanLayer(const MeanDescriptor& meanDescriptor, const char* name)
523 {
524 return pNetworkImpl->AddMeanLayer(meanDescriptor, name);
525 }
526
AddPadLayer(const PadDescriptor & padDescriptor,const char * name)527 IConnectableLayer* INetwork::AddPadLayer(const PadDescriptor& padDescriptor,
528 const char* name)
529 {
530 return pNetworkImpl->AddPadLayer(padDescriptor, name);
531 }
532
AddQuantizeLayer(const char * name)533 IConnectableLayer* INetwork::AddQuantizeLayer(const char* name)
534 {
535 return pNetworkImpl->AddQuantizeLayer(name);
536 }
537
AddStridedSliceLayer(const StridedSliceDescriptor & stridedSliceDescriptor,const char * name)538 IConnectableLayer* INetwork::AddStridedSliceLayer(const StridedSliceDescriptor& stridedSliceDescriptor,
539 const char* name)
540 {
541 return pNetworkImpl->AddStridedSliceLayer(stridedSliceDescriptor, name);
542 }
543
AddMinimumLayer(const char * name)544 IConnectableLayer* INetwork::AddMinimumLayer(const char* name)
545 {
546 ARMNN_NO_DEPRECATE_WARN_BEGIN
547 return pNetworkImpl->AddMinimumLayer(name);
548 ARMNN_NO_DEPRECATE_WARN_END
549 }
550
AddGatherLayer(const GatherDescriptor & descriptor,const char * name)551 IConnectableLayer* INetwork::AddGatherLayer(const GatherDescriptor& descriptor,
552 const char* name)
553 {
554 return pNetworkImpl->AddGatherLayer(descriptor, name);
555 }
556
AddGatherNdLayer(const char * name)557 IConnectableLayer* INetwork::AddGatherNdLayer(const char* name)
558 {
559 return pNetworkImpl->AddGatherNdLayer(name);
560 }
561
AddSwitchLayer(const char * name)562 IConnectableLayer* INetwork::AddSwitchLayer(const char* name)
563 {
564 return pNetworkImpl->AddSwitchLayer(name);
565 }
566
AddPreluLayer(const char * name)567 IConnectableLayer* INetwork::AddPreluLayer(const char* name)
568 {
569 return pNetworkImpl->AddPreluLayer(name);
570 }
571
AddTransposeConvolution2dLayer(const TransposeConvolution2dDescriptor & descriptor,const ConstTensor & weights,const Optional<ConstTensor> & biases,const char * name)572 IConnectableLayer* INetwork::AddTransposeConvolution2dLayer(const TransposeConvolution2dDescriptor& descriptor,
573 const ConstTensor& weights,
574 const Optional<ConstTensor>& biases,
575 const char* name)
576 {
577 return pNetworkImpl->AddTransposeConvolution2dLayer(descriptor, weights, biases, name);
578 }
579
AddTransposeLayer(const TransposeDescriptor & transposeDescriptor,const char * name)580 IConnectableLayer* INetwork::AddTransposeLayer(const TransposeDescriptor& transposeDescriptor,
581 const char* name)
582 {
583 return pNetworkImpl->AddTransposeLayer(transposeDescriptor, name);
584 }
585
AddShapeLayer(const char * name)586 IConnectableLayer* INetwork::AddShapeLayer(const char* name)
587 {
588 return pNetworkImpl->AddShapeLayer(name);
589 }
590
AddStackLayer(const StackDescriptor & descriptor,const char * name)591 IConnectableLayer* INetwork::AddStackLayer(const StackDescriptor& descriptor,
592 const char* name)
593 {
594 return pNetworkImpl->AddStackLayer(descriptor, name);
595 }
596
AddStandInLayer(const StandInDescriptor & descriptor,const char * name)597 IConnectableLayer* INetwork::AddStandInLayer(const StandInDescriptor& descriptor,
598 const char* name)
599 {
600 return pNetworkImpl->AddStandInLayer(descriptor, name);
601 }
602
AddQuantizedLstmLayer(const QuantizedLstmInputParams & params,const char * name)603 IConnectableLayer* INetwork::AddQuantizedLstmLayer(const QuantizedLstmInputParams& params,
604 const char* name)
605 {
606 return pNetworkImpl->AddQuantizedLstmLayer(params, name);
607 }
608
AddQLstmLayer(const QLstmDescriptor & descriptor,const LstmInputParams & params,const char * name)609 IConnectableLayer* INetwork::AddQLstmLayer(const QLstmDescriptor& descriptor,
610 const LstmInputParams& params,
611 const char* name)
612 {
613 return pNetworkImpl->AddQLstmLayer(descriptor, params, name);
614 }
615
AddLogicalBinaryLayer(const LogicalBinaryDescriptor & descriptor,const char * name)616 IConnectableLayer* INetwork::AddLogicalBinaryLayer(const LogicalBinaryDescriptor& descriptor,
617 const char* name)
618 {
619 return pNetworkImpl->AddLogicalBinaryLayer(descriptor, name);
620 }
621
AddUnidirectionalSequenceLstmLayer(const UnidirectionalSequenceLstmDescriptor & descriptor,const LstmInputParams & params,const char * name)622 IConnectableLayer* INetwork::AddUnidirectionalSequenceLstmLayer(
623 const UnidirectionalSequenceLstmDescriptor& descriptor,
624 const LstmInputParams& params,
625 const char* name)
626 {
627 return pNetworkImpl->AddUnidirectionalSequenceLstmLayer(descriptor, params, name);
628 }
629
AddChannelShuffleLayer(const ChannelShuffleDescriptor & descriptor,const char * name)630 IConnectableLayer* INetwork::AddChannelShuffleLayer(const ChannelShuffleDescriptor &descriptor,
631 const char* name)
632 {
633 return pNetworkImpl->AddChannelShuffleLayer(descriptor, name);
634 }
635
AddBatchMatMulLayer(const BatchMatMulDescriptor & descriptor,const char * name)636 IConnectableLayer* INetwork::AddBatchMatMulLayer(const BatchMatMulDescriptor &descriptor,
637 const char* name)
638 {
639 return pNetworkImpl->AddBatchMatMulLayer(descriptor, name);
640 }
641
ExecuteStrategy(IStrategy & strategy) const642 void INetwork::ExecuteStrategy(IStrategy& strategy) const
643 {
644 return pNetworkImpl->ExecuteStrategy(strategy);
645 }
646
CreateRaw(const NetworkOptions & networkOptions)647 armnn::INetwork* INetwork::CreateRaw(const NetworkOptions& networkOptions)
648 {
649 return new INetwork(networkOptions);
650 }
651
Create(const NetworkOptions & networkOptions)652 armnn::INetworkPtr INetwork::Create(const NetworkOptions& networkOptions)
653 {
654 return INetworkPtr(CreateRaw(networkOptions), &INetwork::Destroy);
655 }
656
Destroy(INetwork * network)657 void INetwork::Destroy(INetwork* network)
658 {
659 delete network;
660 }
661
IOptimizedNetwork(const IOptimizedNetwork & other,const ModelOptions & modelOptions)662 IOptimizedNetwork::IOptimizedNetwork(const IOptimizedNetwork& other, const ModelOptions& modelOptions)
663 : pOptimizedNetworkImpl(new OptimizedNetworkImpl(*other.pOptimizedNetworkImpl.get(), modelOptions)) {}
664
IOptimizedNetwork(std::unique_ptr<Graph> graph)665 IOptimizedNetwork::IOptimizedNetwork(std::unique_ptr<Graph> graph)
666 : pOptimizedNetworkImpl(new OptimizedNetworkImpl(std::move(graph))) {}
667
IOptimizedNetwork(std::unique_ptr<OptimizedNetworkImpl> impl)668 IOptimizedNetwork::IOptimizedNetwork(std::unique_ptr<OptimizedNetworkImpl> impl)
669 : pOptimizedNetworkImpl(std::move(impl)) {}
670
IOptimizedNetwork(std::unique_ptr<Graph> graph,const ModelOptions & modelOptions)671 IOptimizedNetwork::IOptimizedNetwork(std::unique_ptr<Graph> graph, const ModelOptions& modelOptions)
672 : pOptimizedNetworkImpl(new OptimizedNetworkImpl(std::move(graph), modelOptions)) {}
673
674 IOptimizedNetwork::~IOptimizedNetwork() = default;
675
Destroy(IOptimizedNetwork * network)676 void IOptimizedNetwork::Destroy(IOptimizedNetwork* network)
677 {
678 delete network;
679 }
680
PrintGraph()681 Status IOptimizedNetwork::PrintGraph()
682 {
683 return pOptimizedNetworkImpl->PrintGraph();
684 }
685
SerializeToDot(std::ostream & stream) const686 Status IOptimizedNetwork::SerializeToDot(std::ostream& stream) const
687 {
688 return pOptimizedNetworkImpl->SerializeToDot(stream);
689 }
690
GetProfiler() const691 const std::shared_ptr<IProfiler>& IOptimizedNetwork::GetProfiler() const
692 {
693 return pOptimizedNetworkImpl->GetGraph().GetProfiler();
694 }
695
GetGuid() const696 arm::pipe::ProfilingGuid IOptimizedNetwork::GetGuid() const
697 {
698 return pOptimizedNetworkImpl->GetGuid();
699 }
700
GetNumInputs() const701 size_t IOptimizedNetwork::GetNumInputs() const
702 {
703 return pOptimizedNetworkImpl->GetNumInputs();
704 }
705
GetNumOutputs() const706 size_t IOptimizedNetwork::GetNumOutputs() const
707 {
708 return pOptimizedNetworkImpl->GetNumOutputs();
709 }
710
PrintGraph()711 Status OptimizedNetworkImpl::PrintGraph()
712 {
713 m_Graph->Print();
714 return Status::Success;
715 }
716
SerializeToDot(std::ostream & stream) const717 Status OptimizedNetworkImpl::SerializeToDot(std::ostream& stream) const
718 {
719 return m_Graph->SerializeToDot(stream);
720 }
721
GetNumInputs() const722 size_t OptimizedNetworkImpl::GetNumInputs() const
723 {
724 return m_Graph->GetNumInputs();
725 }
726
GetNumOutputs() const727 size_t OptimizedNetworkImpl::GetNumOutputs() const
728 {
729 return m_Graph->GetNumOutputs();
730 }
731
ReportError(const std::string & errorMessage,Optional<std::vector<std::string> &> errorMessages)732 void ReportError(const std::string& errorMessage,
733 Optional<std::vector<std::string>&> errorMessages)
734 {
735 std::stringstream fullErrorMessage;
736 fullErrorMessage << "ERROR: " << errorMessage;
737 ARMNN_LOG(warning) << fullErrorMessage.str();
738 if (errorMessages)
739 {
740 errorMessages.value().push_back(fullErrorMessage.str());
741 }
742 }
743
ReportWarning(const std::string & warningMessage,Optional<std::vector<std::string> &> warningMessages)744 void ReportWarning(const std::string& warningMessage,
745 Optional<std::vector<std::string>&> warningMessages)
746 {
747 std::stringstream fullWarningMessage;
748 fullWarningMessage << "WARNING: " << warningMessage;
749 ARMNN_LOG(warning) << fullWarningMessage.str();
750 if (warningMessages)
751 {
752 warningMessages.value().push_back(fullWarningMessage.str());
753 }
754 }
755
ReturnWithError(OptimizationResult res,const Layer * layer,const BackendSettings & backendSettings,Optional<std::vector<std::string> &> errMessages)756 OptimizationResult ReturnWithError(OptimizationResult res,
757 const Layer* layer,
758 const BackendSettings& backendSettings,
759 Optional<std::vector<std::string>&> errMessages)
760 {
761 std::stringstream failureMsg;
762 failureMsg << "Layer of type " << GetLayerTypeAsCString(layer->GetType())
763 << " is not supported on any preferred backend " << backendSettings.m_PreferredBackends;
764 ReportError(failureMsg.str(), errMessages);
765
766 res.m_Error = true;
767 return res;
768 }
769
770
CheckScaleSetOnQuantizedType(Layer * layer,Optional<std::vector<std::string> &> errMessages)771 bool CheckScaleSetOnQuantizedType(Layer* layer, Optional<std::vector<std::string>&> errMessages)
772 {
773 bool noErrors = true;
774 unsigned int numOutputs = layer->GetNumOutputSlots();
775 for (unsigned int i = 0; i < numOutputs; i++) {
776 OutputSlot& outputSlot = layer->GetOutputSlot(i);
777 TensorInfo info = outputSlot.GetTensorInfo();
778 if (DataType::QAsymmU8 == info.GetDataType())
779 {
780 if (0.f == info.GetQuantizationScale())
781 {
782 noErrors = false;
783 std::stringstream ss;
784 ss << "output " << i << " of layer " << GetLayerTypeAsCString(layer->GetType())
785 << " (" << layer->GetNameStr() << ") is of type"
786 << " Quantized 8 bit but its scale parameter has not been set";
787 ReportError(ss.str(), errMessages);
788 }
789 // Softmax under QuantisedAsymm8 must always be scale (1.0f/256.0f) and offset 0
790 if ((info.GetQuantizationScale() != (1.0f / 256.0f) ||
791 info.GetQuantizationOffset() != 0) &&
792 layer->GetType() == armnn::LayerType::Softmax)
793 {
794 std::stringstream ss;
795 ss << "Quantization parameters for Softmax layer (Scale: " <<
796 info.GetQuantizationScale() << " and Offset: " << info.GetQuantizationOffset() <<
797 ") are incorrect and have been updated to Scale: 0.00390625 and Offset: 0";
798 ARMNN_LOG(warning) << ss.str();
799 info.SetQuantizationScale((1.0f /256.0f));
800 info.SetQuantizationOffset(0);
801 outputSlot.SetTensorInfo(info);
802 }
803 }
804 }
805 return noErrors;
806 }
807
AttemptBackendAssignment(BackendSettings & backendSettings,Graph & graph,Layer * layer,BackendId backend,DataType dataTypeIn,DataType dataTypeOut,const std::vector<BackendId> & availablePreferredBackends,std::string & reasonIfUnsupported,Optional<std::vector<std::string> &> errMessages)808 OptimizationResult AttemptBackendAssignment(BackendSettings& backendSettings,
809 Graph& graph,
810 Layer* layer,
811 BackendId backend,
812 DataType dataTypeIn,
813 DataType dataTypeOut,
814 const std::vector<BackendId>& availablePreferredBackends,
815 std::string& reasonIfUnsupported,
816 Optional<std::vector<std::string>&> errMessages)
817 {
818 OptimizationResult result;
819
820 // Helper lambda to compose meaningful error message before returning with error
821 auto ReturnError = [&](const Layer* layer)
822 {
823 return ReturnWithError(result, layer, backendSettings, errMessages);
824 };
825
826 // need to set the compute device on the layer
827 // before we can check if it is supported
828 layer->SetBackendId(backend);
829
830 // To run FP16 operations on CpuAcc we need at least v8.2 architecture. If the available architecture
831 // is older than v8.2, we can check if the operator is supported by changing operator inputs & outputs
832 // to be FP32 and inserting convert layers around the FP32 operator.
833 bool isLayerSupported = IWorkloadFactory::IsLayerSupported(*layer, EmptyOptional(), reasonIfUnsupported);
834 std::string checkStr = "This CPU architecture does not support F16 data type, you need v8.2 or above";
835 if (!isLayerSupported ||
836 reasonIfUnsupported.find(checkStr) != std::string::npos)
837 {
838 if (dataTypeIn == DataType::Float16 || dataTypeOut == DataType::Float16)
839 {
840 if (IWorkloadFactory::IsLayerSupported(*layer, DataType::Float32, reasonIfUnsupported)
841 && layer->GetType() != LayerType::ConvertFp32ToFp16
842 && layer->GetType() != LayerType::ConvertFp16ToFp32)
843 {
844 auto ConstantLayerFromFp16ToFp32 = [](Layer& layer)
845 {
846 if (layer.GetType() == LayerType::Constant)
847 {
848 ConstantLayer* constantLayer = PolymorphicDowncast<ConstantLayer*>(&layer);
849
850 auto& info = constantLayer->m_LayerOutput->GetTensorInfo();
851
852 if (info.GetDataType() == DataType::Float16)
853 {
854 std::vector<float> newValues(info.GetNumElements());
855
856 armnnUtils::FloatingPointConverter::ConvertFloat16To32(
857 constantLayer->m_LayerOutput->GetConstTensor<Half>(),
858 info.GetNumElements(),
859 newValues.data());
860
861 TensorInfo newInfo(info);
862 newInfo.SetDataType(DataType::Float32);
863 ConstTensor newInput(newInfo, newValues);
864 constantLayer->m_LayerOutput.reset(new ScopedTensorHandle(newInput));
865
866 layer.GetOutputSlot(0).SetTensorInfo(newInfo);
867 }
868 }
869 };
870
871 bool checkType = false;
872
873 for (auto inputSlot : layer->GetInputSlots())
874 {
875 auto connectedOutputSlot = inputSlot.GetConnectedOutputSlot();
876 if (connectedOutputSlot->GetOwningLayer().GetType() == LayerType::Constant)
877 {
878 if (connectedOutputSlot->GetNumConnections() == 1)
879 {
880 checkType = true;
881 ConstantLayerFromFp16ToFp32(connectedOutputSlot->GetOwningLayer());
882 }
883 }
884 }
885
886 // Insert FP16 -> FP32 conversion layer before current layer
887 std::vector<ConvertFp16ToFp32Layer*> convertFp16ToFp32Layers;
888 if (dataTypeIn == DataType::Float16)
889 {
890 convertFp16ToFp32Layers =
891 InsertConvertFp16ToFp32LayersBefore(graph, *layer, checkType);
892 }
893
894 // Insert FP32 -> FP16 conversion layer after current layer
895 std::vector<ConvertFp32ToFp16Layer*> convertFp32ToFp16Layers;
896 if (dataTypeOut == DataType::Float16)
897 {
898 convertFp32ToFp16Layers =
899 InsertConvertFp32ToFp16LayersAfter(graph, *layer);
900 }
901
902 // Assign a supported backend to the newly introduced conversion layers
903 auto AssignFirstSupportedBackend = [&](Layer* layer, BackendId preferredBackend)
904 {
905 bool supportedBackendFound = false;
906 std::string reasonIfUnsupported;
907
908 // Try preferred backend first
909 layer->SetBackendId(preferredBackend);
910 if (IWorkloadFactory::IsLayerSupported(*layer,
911 EmptyOptional(),
912 reasonIfUnsupported))
913 {
914 supportedBackendFound = true;
915 }
916 else
917 {
918 for (const auto& backend : availablePreferredBackends)
919 {
920 // Skip preferred backend (we already determined that it is not supported)
921 if (backend == preferredBackend)
922 {
923 continue;
924 }
925
926 layer->SetBackendId(backend);
927 if (IWorkloadFactory::IsLayerSupported(*layer,
928 EmptyOptional(),
929 reasonIfUnsupported))
930 {
931 supportedBackendFound = true;
932 break;
933 }
934 }
935 }
936
937 return supportedBackendFound;
938 };
939
940 for (ConvertFp16ToFp32Layer* convertLayer : convertFp16ToFp32Layers)
941 {
942 if (!AssignFirstSupportedBackend(convertLayer, backend))
943 {
944 return ReturnError(convertLayer);
945 }
946 }
947
948 for (ConvertFp32ToFp16Layer* convertLayer : convertFp32ToFp16Layers)
949 {
950 if (!AssignFirstSupportedBackend(convertLayer, backend))
951 {
952 return ReturnError(convertLayer);
953 }
954 }
955
956 return result;
957 }
958 }
959
960 std::stringstream warningMsg;
961 warningMsg << "Layer of type " << GetLayerTypeAsCString(layer->GetType())
962 << " is not supported on requested backend " << layer->GetBackendId().Get()
963 << " for input data type " << GetDataTypeName(dataTypeIn)
964 << " and output data type " << GetDataTypeName(dataTypeOut)
965 << " (reason: " << reasonIfUnsupported
966 << "), falling back to the next backend.";
967 ReportWarning(warningMsg.str(), errMessages);
968
969 return OptimizationResult(true, false);
970 }
971 else
972 {
973 return result;
974 }
975 }
976
GetLayerInOutDatatype(const Layer * layer)977 inline std::vector<DataType> GetLayerInOutDatatype(const Layer* layer)
978 {
979 DataType dataTypeIn = layer->GetNumInputSlots() == 0 ? DataType::Float32 :
980 layer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo().GetDataType();
981 DataType dataTypeOut = layer->GetNumOutputSlots() == 0 ? DataType::Float32 :
982 layer->GetOutputSlot(0).GetTensorInfo().GetDataType();
983 return {dataTypeIn, dataTypeOut};
984 }
985
986 // Refactor to allow passing the IConnectableLayer* rather than Layer Iterator
987 // on Graph and SubgraphView which are different types.
AssignBackendsIConnectable(OptimizedNetworkImpl * optNetObjPtr,IConnectableLayer * it,Optional<std::vector<std::string> &> errMessages,OptimizationResult & result,BackendSettings & backendSettings,std::vector<BackendId> & availablePreferredBackends)988 void AssignBackendsIConnectable(OptimizedNetworkImpl* optNetObjPtr,
989 IConnectableLayer* it,
990 Optional<std::vector<std::string>&> errMessages,
991 OptimizationResult& result,
992 BackendSettings& backendSettings,
993 std::vector<BackendId>& availablePreferredBackends)
994 {
995 auto ReturnError = [&](const Layer* layer)
996 {
997 return ReturnWithError(result, layer, backendSettings, errMessages);
998 };
999
1000 auto layer = PolymorphicDowncast<Layer*>(it);
1001
1002 if (layer->GetType() == LayerType::Input)
1003 {
1004 return;
1005 }
1006
1007 std::vector<DataType> inOutDataType = GetLayerInOutDatatype(layer);
1008
1009 std::string reasonIfUnsupported;
1010 bool found = false;
1011 if (!CheckScaleSetOnQuantizedType(layer, errMessages))
1012 {
1013 // don't bomb immediately, find all the quantized outputs
1014 // which haven't had a scale set and report them all back.
1015 result.m_Error = true;
1016 }
1017
1018 // First try assign layer to hint backend
1019 if (layer->GetBackendHint().has_value() &&
1020 backendSettings.IsBackendSupported(layer->GetBackendHint().value()) &&
1021 AttemptBackendAssignment(backendSettings,
1022 optNetObjPtr->GetGraph(),
1023 layer,
1024 layer->GetBackendHint().value(),
1025 inOutDataType[0],
1026 inOutDataType[1],
1027 availablePreferredBackends,
1028 reasonIfUnsupported,
1029 errMessages).IsOk())
1030 {
1031 found = true;
1032 backendSettings.m_SelectedBackends.insert(layer->GetBackendHint().value());
1033 }
1034 else
1035 {
1036 // Try assign layer to prefered list of backends
1037 for (const auto& backend : availablePreferredBackends)
1038 {
1039 if (layer->GetBackendHint().has_value() &&
1040 layer->GetBackendHint().value() == backend)
1041 {
1042 continue; //Don't re-test the backend hint
1043 }
1044
1045 OptimizationResult res = AttemptBackendAssignment(backendSettings,
1046 optNetObjPtr->GetGraph(),
1047 layer,
1048 backend,
1049 inOutDataType[0],
1050 inOutDataType[1],
1051 availablePreferredBackends,
1052 reasonIfUnsupported,
1053 errMessages);
1054
1055 if (res.IsOk())
1056 {
1057 found = true;
1058 backendSettings.m_SelectedBackends.insert(backend);
1059 break;
1060 }
1061 else if (res.IsError())
1062 {
1063 result = res; // Cannot continue.
1064 // Note: we don't need to log the error as it would already
1065 // be logged in AttemptBackendAssignment().
1066 }
1067 else
1068 {
1069 ARMNN_ASSERT_MSG(res.IsWarningOnly(), "OptimizationResult in unexpected state.");
1070 }
1071 }
1072 }
1073
1074 // If the layer is unsupported by any devices, log and return a null network.
1075 if (!found)
1076 {
1077 // NOTE: if the layer is not an operation queue type AND we have not got CpuRef as a
1078 // fallback we should set the compute device on the layer to CpuRef (these are not
1079 // available as accelerated operations, or are only available under certain
1080 // conditions, currently they comprise MemCopy, Constant, Permute)
1081 armnn::LayerType layerType = layer->GetType();
1082 if (!backendSettings.IsCpuRefUsed() && (layerType == armnn::LayerType::MemCopy ||
1083 layerType == armnn::LayerType::Constant ||
1084 layerType == armnn::LayerType::Permute))
1085 {
1086 BackendId cpuBackendId(armnn::Compute::CpuRef);
1087 layer->SetBackendId(cpuBackendId);
1088 backendSettings.m_SelectedBackends.insert(cpuBackendId);
1089 }
1090 else
1091 {
1092 result = ReturnError(layer);
1093 }
1094 }
1095
1096 }
1097
AssignBackends(OptimizedNetworkImpl * optNetObjPtr,BackendSettings & backendSettings,Graph::Iterator & firstLayer,Graph::Iterator & lastLayer,Optional<std::vector<std::string> &> errMessages)1098 OptimizationResult AssignBackends(OptimizedNetworkImpl* optNetObjPtr,
1099 BackendSettings& backendSettings,
1100 Graph::Iterator& firstLayer,
1101 Graph::Iterator& lastLayer,
1102 Optional<std::vector<std::string>&> errMessages)
1103 {
1104 ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "Optimizer_AssignBackends");
1105 OptimizationResult result;
1106
1107 auto availablePreferredBackends = backendSettings.GetAvailablePreferredBackends();
1108 if (availablePreferredBackends.empty())
1109 {
1110 std::stringstream failureMsg;
1111 failureMsg << "No preferred backends are available";
1112 ReportError(failureMsg.str(), errMessages);
1113
1114 result.m_Error = true;
1115 return result;
1116 }
1117
1118 for (auto it = firstLayer; it != lastLayer; ++it)
1119 {
1120 auto layer = PolymorphicDowncast<Layer*>(*it);
1121 std::vector<DataType> inOutDataType = GetLayerInOutDatatype(layer);
1122
1123 // In AttemptBackendAssignment() we check:
1124 // - if input/output datatypes of the layer are float16
1125 // - if the layer is supported with these datatypes
1126 // If the layer is not supported (failing on ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED() in clframework),
1127 // we attempt to insert convertion layers either side of the new fp32 layer.
1128 bool isFloat16 = false;
1129 for (auto type : inOutDataType)
1130 {
1131 if (type == DataType::Float16)
1132 {
1133 isFloat16 = true;
1134 break;
1135 }
1136 }
1137
1138 if (layer->GetBackendId() == "Unknown" || isFloat16)
1139 {
1140 AssignBackendsIConnectable(optNetObjPtr,
1141 *it,
1142 errMessages,
1143 result,
1144 backendSettings,
1145 availablePreferredBackends);
1146 }
1147 }
1148
1149 for (auto it = firstLayer; it != lastLayer; ++it)
1150 {
1151 auto layer = PolymorphicDowncast<Layer*>(*it);
1152
1153 if(layer->GetType() == LayerType::Input)
1154 {
1155 BackendId connectedBackendId = layer->GetOutputSlot(0).GetConnection(0)->GetOwningLayer().GetBackendId();
1156 layer->SetBackendId(connectedBackendId);
1157 }
1158 }
1159
1160 return result;
1161 }
1162
AssignBackends(OptimizedNetworkImpl * optNetObjPtr,BackendSettings & backendSettings,SubgraphView::IConnectableLayerIterator & firstLayer,SubgraphView::IConnectableLayerIterator & lastLayer,Optional<std::vector<std::string> &> errMessages)1163 OptimizationResult AssignBackends(OptimizedNetworkImpl* optNetObjPtr,
1164 BackendSettings& backendSettings,
1165 SubgraphView::IConnectableLayerIterator& firstLayer,
1166 SubgraphView::IConnectableLayerIterator& lastLayer,
1167 Optional<std::vector<std::string>&> errMessages)
1168 {
1169 ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "Optimizer_AssignBackends");
1170 OptimizationResult result;
1171
1172 auto availablePreferredBackends = backendSettings.GetAvailablePreferredBackends();
1173 if (availablePreferredBackends.empty())
1174 {
1175 std::stringstream failureMsg;
1176 failureMsg << "No preferred backends are available";
1177 ReportError(failureMsg.str(), errMessages);
1178
1179 result.m_Error = true;
1180 return result;
1181 }
1182
1183 for (auto it = firstLayer; it != lastLayer; ++it)
1184 {
1185 AssignBackendsIConnectable(optNetObjPtr,
1186 *it,
1187 errMessages,
1188 result,
1189 backendSettings,
1190 availablePreferredBackends);
1191 }
1192
1193 for (auto it = firstLayer; it != lastLayer; ++it)
1194 {
1195 auto layer = PolymorphicDowncast<Layer*>(*it);
1196
1197 if(layer->GetType() == LayerType::Input)
1198 {
1199 BackendId connectedBackendId = layer->GetOutputSlot(0).GetConnection(0)->GetOwningLayer().GetBackendId();
1200 layer->SetBackendId(connectedBackendId);
1201 }
1202 }
1203
1204 return result;
1205 }
1206
AssignBackends(OptimizedNetworkImpl * optNetObjPtr,BackendSettings & backendSettings,SubgraphView & subgraph,Optional<std::vector<std::string> &> errMessages)1207 OptimizationResult AssignBackends(OptimizedNetworkImpl* optNetObjPtr,
1208 BackendSettings& backendSettings,
1209 SubgraphView& subgraph,
1210 Optional<std::vector<std::string>&> errMessages)
1211 {
1212 SubgraphView::IConnectableLayerIterator firstLayer = subgraph.beginIConnectable();
1213 SubgraphView::IConnectableLayerIterator lastLayer = subgraph.endIConnectable();
1214 return AssignBackends(optNetObjPtr,
1215 backendSettings,
1216 firstLayer,
1217 lastLayer,
1218 errMessages);
1219 }
1220
CreateSupportedBackends(TensorHandleFactoryRegistry & handleFactoryRegistry,BackendSettings & backendSettings)1221 BackendsMap CreateSupportedBackends(TensorHandleFactoryRegistry& handleFactoryRegistry,
1222 BackendSettings& backendSettings)
1223 {
1224 BackendsMap backends;
1225 auto const& backendRegistry = BackendRegistryInstance();
1226 for (auto&& selectedBackend : backendSettings.m_SupportedBackends)
1227 {
1228 auto backendFactory = backendRegistry.GetFactory(selectedBackend);
1229 auto backendObjPtr = backendFactory();
1230 ARMNN_ASSERT(backendObjPtr);
1231
1232 backendObjPtr->RegisterTensorHandleFactories(handleFactoryRegistry);
1233
1234 backends[backendObjPtr->GetId()] = std::move(backendObjPtr);
1235 }
1236
1237 return backends;
1238 }
1239
ApplyBackendOptimizations(OptimizedNetworkImpl * optNetObjPtr,BackendSettings & backendSettings,BackendsMap & backends,const ModelOptions & modelOptions,Optional<std::vector<std::string> &> errMessages)1240 OptimizationResult ApplyBackendOptimizations(OptimizedNetworkImpl* optNetObjPtr,
1241 BackendSettings& backendSettings,
1242 BackendsMap& backends,
1243 const ModelOptions& modelOptions,
1244 Optional<std::vector<std::string>&> errMessages)
1245 {
1246 ARMNN_ASSERT(optNetObjPtr);
1247 ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "Optimizer_ApplyBackendOptimizations")
1248 OptimizationResult result;
1249
1250 // Get the optimized graph
1251 Graph& optGraph = optNetObjPtr->GetGraph();
1252
1253 // Run backend specific optimizations
1254 for (auto&& selectedBackend : backendSettings.m_SelectedBackends)
1255 {
1256 auto backendObjPtr = backends.find(selectedBackend)->second.get();
1257 ARMNN_ASSERT(backendObjPtr);
1258
1259 if (selectedBackend == armnn::Compute::GpuAcc || selectedBackend == armnn::Compute::CpuAcc)
1260 {
1261 Optimizer::Pass(optGraph, MakeOptimizations(optimizations::PermuteDepthwiseConv2dWeights()));
1262 Optimizer::Pass(optGraph, MakeOptimizations(optimizations::FusePermuteIntoConstLayer()));
1263 }
1264
1265 // Select sub-graphs based on backend
1266 SubgraphViewSelector::Subgraphs subgraphs =
1267 SubgraphViewSelector::SelectSubgraphs(optGraph,
1268 // Select layers assigned to the requested backend
1269 [&backendObjPtr](const Layer& layer)
1270 {
1271
1272 return layer.GetType() != LayerType::Input &&
1273 layer.GetType() != LayerType::Output &&
1274 layer.GetBackendId() == backendObjPtr->GetId();
1275 });
1276 if (subgraphs.empty())
1277 {
1278 // No sub-graphs found, try with next selected backend
1279 continue;
1280 }
1281
1282 // Try to optimize each sub-graph
1283 for (auto& subgraph : subgraphs)
1284 {
1285 // Try to optimize the current sub-graph
1286 ARMNN_SCOPED_PROFILING_EVENT(backendObjPtr->GetId(), "Optimizer_OptimizeSubgraph");
1287 OptimizationViews optimizationViews = backendObjPtr->OptimizeSubgraphView(*subgraph, modelOptions);
1288 ARMNN_ASSERT(optimizationViews.Validate(*subgraph));
1289
1290 // Optimization attempted, check the resulting optimized sub-graph
1291 for (auto& substitution : optimizationViews.GetSubstitutions())
1292 {
1293 // Sub-graph optimized, substitute the sub-graph with the new optimized one in the main optimized graph
1294 SubgraphView& replacementSubgraph = substitution.m_ReplacementSubgraph;
1295 SubgraphView& substitutableSubgraph = substitution.m_SubstitutableSubgraph;
1296 optGraph.SubstituteSubgraph(substitutableSubgraph, replacementSubgraph);
1297
1298 // Assign the current backend to the optimized sub-graph
1299 const SubgraphView::IConnectableLayers& subgraphLayers = replacementSubgraph.GetIConnectableLayers();
1300 std::for_each(subgraphLayers.begin(), subgraphLayers.end(), [&selectedBackend](IConnectableLayer* l)
1301 {
1302 ARMNN_ASSERT(l);
1303 PolymorphicDowncast<Layer*>(l)->SetBackendId(selectedBackend);
1304 });
1305 }
1306
1307 if (!optimizationViews.GetFailedSubgraphs().empty())
1308 {
1309 std::stringstream warningMsg;
1310 warningMsg << "Some sub-graph(s) failed to optimized on " << backendObjPtr->GetId() << " backend.";
1311 ReportWarning(warningMsg.str(), errMessages);
1312
1313 // Failed to optimize the given sub-graph, re-assign the sub-graph layers to other available backends
1314 BackendSettings settingsCopy(backendSettings);
1315 if (!backendObjPtr->GetId().IsCpuRef())
1316 {
1317 // Add the current backend to the list of backends to ignore
1318 settingsCopy.m_IgnoredBackends.insert(backendObjPtr->GetId());
1319 }
1320
1321 int count=0;
1322 for (auto& failedSubgraph : optimizationViews.GetFailedSubgraphs())
1323 {
1324 // An error occurred: the optimization was attempted but not performed, try different backends
1325 std::stringstream subgraphMsg;
1326 subgraphMsg << "Re-assigning backends to " << failedSubgraph.GetIConnectableLayers().size()
1327 << " layers inside sub-graph " << count++;
1328 ReportWarning(subgraphMsg.str(), errMessages);
1329
1330 OptimizationResult reassignmentResult = AssignBackends(optNetObjPtr,
1331 settingsCopy,
1332 *subgraph,
1333 errMessages);
1334 if (reassignmentResult.m_Error)
1335 {
1336 // Failed to re-assign one of the remaining backends to each layer of the sub-graph
1337 result.m_Error = true;
1338 return result;
1339 }
1340 }
1341 }
1342 }
1343 }
1344
1345 return result;
1346 }
1347
RequiresCopy(ITensorHandleFactory::FactoryId src,ITensorHandleFactory::FactoryId dst,TensorHandleFactoryRegistry & registry)1348 bool RequiresCopy(ITensorHandleFactory::FactoryId src,
1349 ITensorHandleFactory::FactoryId dst,
1350 TensorHandleFactoryRegistry& registry)
1351 {
1352 if (src != dst)
1353 {
1354 ITensorHandleFactory* srcFactory = registry.GetFactory(src);
1355 ITensorHandleFactory* dstFactory = registry.GetFactory(dst);
1356
1357 if (srcFactory && dstFactory &&
1358 (srcFactory->GetExportFlags() & dstFactory->GetImportFlags()) != 0)
1359 {
1360 return false;
1361 }
1362 return true;
1363 }
1364 return false;
1365 }
1366
1367 // Find the handle factory for the input layer which results in fewest required copies.
CalculateSlotOptionForInput(BackendsMap & backends,OutputSlot & slot,TensorHandleFactoryRegistry & registry,bool importEnabled)1368 ITensorHandleFactory::FactoryId CalculateSlotOptionForInput(BackendsMap& backends,
1369 OutputSlot& slot,
1370 TensorHandleFactoryRegistry& registry,
1371 bool importEnabled)
1372 {
1373 Layer& layer = slot.GetOwningLayer();
1374 ARMNN_ASSERT(layer.GetType() == LayerType::Input);
1375
1376 // Explicitly select the tensorhandle factory for InputLayer because the rules for it are slightly different. It
1377 // doesn't matter which backend it is assigned to because they all use the same implementation, which
1378 // requires Map/Unmap support. This means that, so long as the handle type supports map/unmap semantics, we can
1379 // select a factory with maximum compatibility with the layers connected to the InputLayer.
1380
1381 // First ensure the from backends can support the TensorHandeAPI
1382 auto frmBackend = backends.find(layer.GetBackendId());
1383 if (frmBackend == backends.end() ||
1384 !frmBackend->second->SupportsTensorAllocatorAPI())
1385 {
1386 return ITensorHandleFactory::LegacyFactoryId;
1387 }
1388
1389 // Go through all connections to the output slot and determine the TensorHandleFactory which results in the
1390 // fewest copies.
1391 std::map<ITensorHandleFactory::FactoryId, int> factoryScores;
1392 int topScore = 0;
1393 ITensorHandleFactory::FactoryId topChoice = ITensorHandleFactory::LegacyFactoryId;
1394
1395 for (auto&& connection : slot.GetConnections())
1396 {
1397
1398 const Layer& connectedLayer = connection->GetOwningLayer();
1399
1400 auto toBackend = backends.find(connectedLayer.GetBackendId());
1401 ARMNN_ASSERT_MSG(toBackend != backends.end(), "Backend id not found for the connected layer");
1402
1403 if (!toBackend->second.get()->SupportsTensorAllocatorAPI())
1404 {
1405 // The destination backend does not support the tensor allocator API, move to the next one
1406 continue;
1407 }
1408
1409 auto dstPrefs = toBackend->second.get()->GetHandleFactoryPreferences();
1410 for (auto&& dst : dstPrefs)
1411 {
1412 // Input layers use the mem copy workload or import, so the selected factory must
1413 // support either the map/unmap API or Import API
1414 ITensorHandleFactory* factory = registry.GetFactory(dst);
1415 if (importEnabled && factory->GetImportFlags() == 0)
1416 {
1417 continue;
1418 }
1419 else if (!importEnabled && !factory->SupportsMapUnmap())
1420 {
1421 continue;
1422 }
1423
1424 auto it = factoryScores.find(dst);
1425 if (it == factoryScores.end())
1426 {
1427 // Add new score to the table
1428 factoryScores[dst] = 0;
1429 if (topChoice == ITensorHandleFactory::LegacyFactoryId)
1430 {
1431 topChoice = dst;
1432 }
1433 }
1434 else
1435 {
1436 // Increase the score
1437 factoryScores[dst]++;
1438
1439 // Track the best option
1440 if (factoryScores[dst] > topScore)
1441 {
1442 topScore = factoryScores[dst];
1443 topChoice = dst;
1444 }
1445 }
1446 }
1447 }
1448
1449 return topChoice;
1450 }
1451
1452 // Find the handle factory for the output layer which results in fewest required copies.
CalculateSlotOptionForOutput(BackendsMap & backends,OutputSlot & slot,TensorHandleFactoryRegistry & registry)1453 ITensorHandleFactory::FactoryId CalculateSlotOptionForOutput(BackendsMap& backends,
1454 OutputSlot& slot,
1455 TensorHandleFactoryRegistry& registry)
1456 {
1457 IgnoreUnused(backends, slot, registry);
1458 return ITensorHandleFactory::DeferredFactoryId;
1459 }
1460
1461 // For all handle factories supported on the source backend, we wish to find the one which requires the fewest copies
1462 // when considering all connections.
CalculateSlotOption(BackendsMap & backends,OutputSlot & outputSlot,TensorHandleFactoryRegistry & registry,bool exportEnabled)1463 ITensorHandleFactory::FactoryId CalculateSlotOption(BackendsMap& backends,
1464 OutputSlot& outputSlot,
1465 TensorHandleFactoryRegistry& registry,
1466 bool exportEnabled)
1467 {
1468 // First ensure the from backends can support the TensorHandeAPI
1469 Layer& layer = outputSlot.GetOwningLayer();
1470 auto frmBackend = backends.find(layer.GetBackendId());
1471 if (frmBackend == backends.end() ||
1472 !frmBackend->second->SupportsTensorAllocatorAPI())
1473 {
1474 return ITensorHandleFactory::LegacyFactoryId;
1475 }
1476
1477 bool outputConnection = false;
1478 for (auto&& connection : outputSlot.GetConnections())
1479 {
1480 const Layer& connectedLayer = connection->GetOwningLayer();
1481 if (connectedLayer.GetType() == LayerType::Output)
1482 {
1483 outputConnection = true;
1484 }
1485 }
1486
1487 IBackendInternal* srcBackend = frmBackend->second.get();
1488 auto srcPrefs = srcBackend->GetHandleFactoryPreferences();
1489
1490 // Initialize the scores
1491 std::map<ITensorHandleFactory::FactoryId, int> factoryScores;
1492 for (auto&& pref : srcPrefs)
1493 {
1494 if (exportEnabled)
1495 {
1496 ITensorHandleFactory* factory = registry.GetFactory(pref);
1497 if (outputConnection)
1498 {
1499 // Check if this is fallback case
1500 bool fallbackConnection = false;
1501 for (auto&& inputSlot : layer.GetInputSlots())
1502 {
1503 if (inputSlot.GetConnectedOutputSlot()->GetOwningLayer().GetBackendId() != layer.GetBackendId())
1504 {
1505 fallbackConnection = true;
1506 }
1507 }
1508 if (fallbackConnection)
1509 {
1510 auto factoryCap = factory->GetCapabilities(&layer, &layer, CapabilityClass::FallbackImportDisabled);
1511 // Cannot use factory import if fallback import is not supported.
1512 if (!factoryCap.empty())
1513 {
1514 continue;
1515 }
1516 }
1517 else if (factory->GetExportFlags() == 0)
1518 {
1519 continue;
1520 }
1521 }
1522 if (!outputConnection)
1523 {
1524 auto factoryCap = factory->GetCapabilities(&layer, &layer, CapabilityClass::FallbackImportDisabled);
1525 // Cannot use factory import if fallback import is not supported.
1526 if (!factoryCap.empty())
1527 {
1528 continue;
1529 }
1530 }
1531
1532 }
1533 else
1534 {
1535 // Only consider factories that support map/unmap
1536 ITensorHandleFactory* factory = registry.GetFactory(pref);
1537 if (!factory->SupportsMapUnmap())
1538 {
1539 // The current tensor handle factory does not support the map/unmap strategy, move to the next one
1540 continue;
1541 }
1542 }
1543
1544
1545 auto it = factoryScores.find(pref);
1546 if (it == factoryScores.end())
1547 {
1548 // Add new score to the table
1549 factoryScores[pref] = 0;
1550 }
1551 }
1552
1553 // Score each handle factory based on how many times it requires copies on the slot connections
1554 for (auto&& connection : outputSlot.GetConnections())
1555 {
1556 const Layer& connectedLayer = connection->GetOwningLayer();
1557
1558 auto toBackend = backends.find(connectedLayer.GetBackendId());
1559 ARMNN_ASSERT_MSG(toBackend != backends.end(), "Backend id not found for the connected layer");
1560
1561 auto dstPrefs = toBackend->second.get()->GetHandleFactoryPreferences();
1562 for (auto&& src : srcPrefs)
1563 {
1564 if (factoryScores.find(src) == factoryScores.end()) // Don't consider excluded factories
1565 {
1566 continue;
1567 }
1568
1569 for (auto&& dst : dstPrefs)
1570 {
1571 if (RequiresCopy(src, dst, registry))
1572 {
1573 // Copy avoided, increase the score
1574 factoryScores[src]++;
1575 break;
1576 }
1577 }
1578 }
1579 }
1580
1581 // Find the lowest score
1582 int minScore = std::numeric_limits<int>::max();
1583 for (auto it : factoryScores)
1584 {
1585 minScore = std::min(minScore, it.second);
1586 }
1587
1588 // Collect factories matching the best(lowest) score
1589 std::vector<ITensorHandleFactory::FactoryId> optimalFactories;
1590 for (auto it : factoryScores)
1591 {
1592 if (it.second == minScore)
1593 {
1594 optimalFactories.push_back(it.first);
1595 }
1596 }
1597
1598 // For all compatible Factories matching the best score, find the preferred one for the current layer.
1599 for (auto&& srcPref : srcPrefs)
1600 {
1601 for (auto&& comp : optimalFactories)
1602 {
1603 if (comp == srcPref)
1604 {
1605 return comp;
1606 }
1607 }
1608 }
1609
1610 return ITensorHandleFactory::LegacyFactoryId;
1611 }
1612
CalculateEdgeStrategy(BackendsMap & backends,ITensorHandleFactory::FactoryId srcFactoryId,const Layer & layer,const Layer & connectedLayer,TensorHandleFactoryRegistry & registry,bool importEnabled)1613 EdgeStrategy CalculateEdgeStrategy(BackendsMap& backends,
1614 ITensorHandleFactory::FactoryId srcFactoryId,
1615 const Layer& layer,
1616 const Layer& connectedLayer,
1617 TensorHandleFactoryRegistry& registry,
1618 bool importEnabled)
1619 {
1620 auto toBackend = backends.find(connectedLayer.GetBackendId());
1621 ARMNN_ASSERT_MSG(toBackend != backends.end(), "Backend id not found for the connected layer");
1622
1623 auto dstPrefs = toBackend->second.get()->GetHandleFactoryPreferences();
1624
1625 // Legacy API check for backward compatibility
1626 if (srcFactoryId == ITensorHandleFactory::LegacyFactoryId || dstPrefs.empty())
1627 {
1628 if (layer.GetBackendId() != connectedLayer.GetBackendId())
1629 {
1630 return EdgeStrategy::CopyToTarget;
1631 }
1632 else
1633 {
1634 return EdgeStrategy::DirectCompatibility;
1635 }
1636 }
1637
1638 // TensorHandleFactory API present, so perform more sophisticated strategies.
1639 // Dst Output layers don't require copy because they use import or map/unmap
1640 if (connectedLayer.GetType() == LayerType::Output)
1641 {
1642 return EdgeStrategy::DirectCompatibility;
1643 }
1644
1645 // Search for direct match in prefs
1646 for (auto&& pref : dstPrefs)
1647 {
1648 if (pref == srcFactoryId)
1649 {
1650 return EdgeStrategy::DirectCompatibility;
1651 }
1652 }
1653
1654 // Search for export/import options
1655 ITensorHandleFactory* srcFactory = registry.GetFactory(srcFactoryId);
1656 if (srcFactory->GetExportFlags() != 0 && importEnabled)
1657 {
1658 for (auto&& pref : dstPrefs)
1659 {
1660 ITensorHandleFactory* dstFactory = registry.GetFactory(pref);
1661
1662 // Handles cases when a destPref is not listed in TensorHandleFactoryRegistry
1663 if (!dstFactory) {
1664 continue;
1665 }
1666 if ((dstFactory->GetImportFlags() & srcFactory->GetExportFlags()) != 0)
1667 {
1668 auto srcCapability = srcFactory->GetCapabilities(&layer, &layer, CapabilityClass::PaddingRequired);
1669 auto dstCapability = dstFactory->GetCapabilities(&connectedLayer,
1670 &connectedLayer,
1671 CapabilityClass::PaddingRequired);
1672 auto srcFallback = srcFactory->GetCapabilities(&layer, &layer, CapabilityClass::FallbackImportDisabled);
1673 auto dstFallback = dstFactory->GetCapabilities(&connectedLayer,
1674 &connectedLayer,
1675 CapabilityClass::FallbackImportDisabled);
1676 // Do not require memory copy if the source and destination do not require padding.
1677 if (srcCapability.empty() && dstCapability.empty() && srcFallback.empty() && dstFallback.empty())
1678 {
1679 return EdgeStrategy::ExportToTarget;
1680 }
1681 }
1682 }
1683 }
1684
1685 // Search for copy options via map/unmap
1686 if (srcFactory->SupportsMapUnmap())
1687 {
1688 for (auto&& pref : dstPrefs)
1689 {
1690 ITensorHandleFactory* dstFactory = registry.GetFactory(pref);
1691 if (dstFactory && dstFactory->SupportsMapUnmap())
1692 {
1693 return EdgeStrategy::CopyToTarget;
1694 }
1695 }
1696 }
1697
1698 return EdgeStrategy::Undefined;
1699 }
1700
1701 // Select the TensorHandleFactories and the corresponding memory strategy
SelectTensorHandleStrategy(Graph & optGraph,BackendsMap & backends,TensorHandleFactoryRegistry & registry,bool importEnabled,bool exportEnabled,Optional<std::vector<std::string> &> errMessages)1702 OptimizationResult SelectTensorHandleStrategy(Graph& optGraph,
1703 BackendsMap& backends,
1704 TensorHandleFactoryRegistry& registry,
1705 bool importEnabled,
1706 bool exportEnabled,
1707 Optional<std::vector<std::string>&> errMessages)
1708 {
1709 ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "Optimizer_SelectTensorHandleStrategy");
1710 OptimizationResult result;
1711
1712 optGraph.ForEachLayer([&backends, ®istry, &result, &errMessages, importEnabled, exportEnabled](Layer* layer)
1713 {
1714 ARMNN_ASSERT(layer);
1715
1716 // Lets make sure the backend is in our list of supported backends. Something went wrong during backend
1717 // assignment if this check fails
1718 ARMNN_ASSERT(backends.find(layer->GetBackendId()) != backends.end());
1719
1720 // Check each output separately
1721 for (unsigned int slotIdx = 0; slotIdx < layer->GetNumOutputSlots(); slotIdx++)
1722 {
1723 OutputSlot& outputSlot = layer->GetOutputSlot(slotIdx);
1724
1725 ITensorHandleFactory::FactoryId slotOption = ITensorHandleFactory::LegacyFactoryId;
1726
1727 // Calculate the factory to use which results in the fewest copies being made.
1728 switch(layer->GetType())
1729 {
1730 case LayerType::Input:
1731 slotOption = CalculateSlotOptionForInput(backends, outputSlot, registry, importEnabled);
1732 break;
1733 case LayerType::Output:
1734 slotOption = CalculateSlotOptionForOutput(backends, outputSlot, registry);
1735 break;
1736 default:
1737 slotOption = CalculateSlotOption(backends, outputSlot, registry, exportEnabled);
1738 break;
1739 }
1740 outputSlot.SetTensorHandleFactory(slotOption);
1741
1742 // Now determine the "best" edge strategy for each connection given the slotOption.
1743 unsigned int connectionIdx = 0;
1744 for (auto&& connection : outputSlot.GetConnections())
1745 {
1746 const Layer& connectedLayer = connection->GetOwningLayer();
1747
1748 EdgeStrategy strategy = CalculateEdgeStrategy(backends, slotOption, *layer, connectedLayer,
1749 registry, importEnabled);
1750
1751 if (strategy == EdgeStrategy::Undefined)
1752 {
1753 result.m_Error = true;
1754 if (errMessages)
1755 {
1756 errMessages.value().emplace_back("Could not find valid strategy required for compatibility"
1757 " between backends.");
1758 }
1759 return;
1760 }
1761
1762 outputSlot.SetEdgeStrategy(connectionIdx, strategy);
1763
1764 connectionIdx++;
1765 }
1766 }
1767 });
1768
1769 return result;
1770 }
1771
1772 // Forwarding function to remain backward compatible with legacy OptimizerOptions
Optimize(const Graph & inGraph,const std::vector<BackendId> & backendPreferences,const IDeviceSpec & deviceSpec,const OptimizerOptions & options,Optional<std::vector<std::string> &> messages)1773 IOptimizedNetworkPtr Optimize(const Graph& inGraph,
1774 const std::vector<BackendId>& backendPreferences,
1775 const IDeviceSpec& deviceSpec,
1776 const OptimizerOptions& options,
1777 Optional<std::vector<std::string>&> messages)
1778 {
1779 return Optimize(inGraph,
1780 backendPreferences,
1781 deviceSpec,
1782 OptimizerOptionsOpaque(options),
1783 messages);
1784 }
1785
Optimize(const Graph & inGraph,const std::vector<BackendId> & backendPreferences,const IDeviceSpec & deviceSpec,const OptimizerOptionsOpaque & options,Optional<std::vector<std::string> &> messages)1786 IOptimizedNetworkPtr Optimize(const Graph& inGraph,
1787 const std::vector<BackendId>& backendPreferences,
1788 const IDeviceSpec& deviceSpec,
1789 const OptimizerOptionsOpaque& options,
1790 Optional<std::vector<std::string>&> messages)
1791 {
1792 ARMNN_LOG(debug) << options.ToString();
1793
1794 // Enable profiling
1795 auto profiler = inGraph.GetProfiler();
1796 ProfilerManager::GetInstance().RegisterProfiler(profiler.get());
1797 profiler->EnableProfiling(options.GetProfilingEnabled());
1798
1799 ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "Optimizer");
1800 if (backendPreferences.empty())
1801 {
1802 throw InvalidArgumentException("Invoked Optimize with no backends specified");
1803 }
1804
1805 if (options.GetReduceFp32ToBf16())
1806 {
1807 throw InvalidArgumentException("BFloat16 optimization is currently ignored. In order to use Bf16 optimization "
1808 "Please use the FastMathEnabled backend option for CpuAcc or GpuAcc.");
1809 }
1810
1811 if (options.GetReduceFp32ToFp16() && options.GetReduceFp32ToBf16())
1812 {
1813 throw InvalidArgumentException("BFloat16 and Float16 optimization cannot be enabled at the same time.");
1814 }
1815
1816 // Ensure TensorInfo is set on all output slots of ConstantLayers in the graph
1817 inGraph.VerifyConstantLayerSetTensorInfo();
1818
1819 std::unique_ptr<Graph> graph = std::make_unique<Graph>(inGraph);
1820
1821 // We need to pass on the information about whether import and export is enabled to the LoadNetwork phase.
1822 // The mechanism to do that is to add model options to the optimized network.
1823 armnn::BackendOptions importExport("Global",
1824 {{"ImportEnabled", options.GetImportEnabled()},
1825 {"ExportEnabled", options.GetExportEnabled()}});
1826 ModelOptions optimizedOptions(options.GetModelOptions());
1827 optimizedOptions.push_back(importExport);
1828
1829 auto optNet = IOptimizedNetworkPtr(new IOptimizedNetwork(std::move(graph), optimizedOptions),
1830 &IOptimizedNetwork::Destroy);
1831
1832 IOptimizedNetwork* optNetObjPtr = optNet.get();
1833
1834 // Get the optimized graph
1835 Graph& optGraph = optNetObjPtr->pOptimizedNetworkImpl->GetGraph();
1836
1837 if(options.GetShapeInferenceMethod() == ShapeInferenceMethod::InferAndValidate)
1838 {
1839 // Infer the tensor infos for all output slots. Throws an exception on failure
1840 optGraph.InferTensorInfos();
1841 }
1842
1843 // Perform AddBroadcastReshapeLayer optimisation
1844 using namespace optimizations;
1845 Optimizer::Pass(optGraph, MakeOptimizations(AddBroadcastReshapeLayer()));
1846
1847 if(options.GetShapeInferenceMethod() == ShapeInferenceMethod::ValidateOnly)
1848 {
1849 // Validate the tensor infos for all output slots. Throws an exception on failure
1850 optGraph.InferTensorInfos();
1851 }
1852
1853
1854 // Group Constant Layer optimizations together where possible.
1855 // This is important as:
1856 // FusePermuteIntoConstantLayer must happen before FoldPadIntoDepthwiseConvolution2d and
1857 // FuseBatchNormIntoDepthwiseConvolution2D.
1858 // ConvertConstDequantisationLayersToConstLayers must happen before FoldPadIntoConvolution2d
1859 Optimizer::Pass(optGraph, MakeOptimizations(FusePermuteIntoConstLayer(),
1860 ConvertConstDequantisationLayersToConstLayers()));
1861 // Perform optimisation passes
1862 Optimizer::Pass(optGraph, MakeOptimizations(SquashEqualPermuteSiblings(),
1863 SquashEqualTransposeSiblings(),
1864 SquashEqualReshapeSiblings(),
1865 OptimizeInversePermutes(),
1866 OptimizeInverseTransposes(),
1867 MovePermuteUp(),
1868 MoveTransposeUp(),
1869 PermuteAsReshape(),
1870 TransposeAsReshape(),
1871 OptimizeConsecutiveReshapes(),
1872 FoldPadIntoConvolution2d(),
1873 FoldPadIntoDepthwiseConvolution2d(),
1874 FoldPadIntoPooling2d(),
1875 PermuteAndBatchToSpaceAsDepthToSpace(),
1876 TransposeAndBatchToSpaceAsDepthToSpace(),
1877 FuseBatchNormIntoConvolution2DFloat32(),
1878 FuseBatchNormIntoConvolution2DFloat16(),
1879 FuseBatchNormIntoDepthwiseConvolution2DFloat32(),
1880 FuseBatchNormIntoDepthwiseConvolution2DFloat16()));
1881
1882
1883 if (options.GetReduceFp32ToFp16())
1884 {
1885 ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "Optimizer_ReduceFp32ToFp16");
1886 Optimizer::Pass(optGraph, MakeOptimizations(Fp32NetworkToFp16Converter()));
1887 Optimizer::Pass(optGraph, MakeOptimizations(ConvertConstantsFloatToHalf()));
1888 }
1889
1890 // Initialize backend settings
1891 BackendSettings backendSettings(backendPreferences, deviceSpec);
1892 if (backendSettings.GetAvailablePreferredBackends().empty())
1893 {
1894 std::stringstream failureMsg;
1895 failureMsg << "None of the preferred backends " << backendPreferences
1896 << " are supported. Current platform provides " << backendSettings.m_SupportedBackends;
1897 ReportError(failureMsg.str(), messages);
1898 throw InvalidArgumentException(failureMsg.str());
1899 }
1900
1901 // Create a map to temporarily hold initialized backend objects
1902 TensorHandleFactoryRegistry tensorHandleFactoryRegistry;
1903 BackendsMap backends = CreateSupportedBackends(tensorHandleFactoryRegistry, backendSettings);
1904
1905 // Assign an available backend to each layer
1906 Graph::Iterator firstLayer = optGraph.begin();
1907 Graph::Iterator lastLayer = optGraph.end();
1908 OptimizationResult assignBackendsResult = AssignBackends(optNetObjPtr->pOptimizedNetworkImpl.get(),
1909 backendSettings,
1910 firstLayer,
1911 lastLayer,
1912 messages);
1913 if (assignBackendsResult.m_Error)
1914 {
1915 // Failed to assign a backend to each layer
1916 throw InvalidArgumentException("Failed to assign a backend to each layer");
1917 }
1918
1919 Optimizer::Pass(optGraph, MakeOptimizations(OptimizeInverseConversionsFp16(),
1920 OptimizeInverseConversionsFp32()));
1921
1922 // Apply the backend-specific optimizations
1923 OptimizationResult backendOptimizationResult = ApplyBackendOptimizations(optNetObjPtr->pOptimizedNetworkImpl.get(),
1924 backendSettings,
1925 backends,
1926 options.GetModelOptions(),
1927 messages);
1928 if (backendOptimizationResult.m_Error)
1929 {
1930 // Failed to apply the backend-specific optimizations
1931 throw InvalidArgumentException("Failed to apply the backend-specific optimizations");
1932 }
1933
1934 // Convert constants
1935 {
1936 ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "Optimizer_ConvertConstants");
1937 Optimizer::Pass(optGraph, MakeOptimizations(ConvertConstantsFloatToHalf()));
1938 Optimizer::Pass(optGraph, MakeOptimizations(ConvertConstantsHalfToFloat()));
1939 }
1940
1941 // This must occur after all topological changes to the graph and any redirection of variables
1942 // If the debug flag is set, then insert a DebugLayer after each layer
1943 // Doing this after applying the backend optimizations as they might have changed some layers
1944 if (options.GetDebugEnabled() && !options.GetDebugToFileEnabled())
1945 {
1946 Optimizer::Pass(optGraph, MakeOptimizations(InsertDebugLayer()));
1947 }
1948 else if (options.GetDebugToFileEnabled())
1949 {
1950 // Setup the output file path
1951 try
1952 {
1953 auto result = armnnUtils::Filesystem::CreateDirectory("/ArmNNIntermediateLayerOutputs");
1954 ARMNN_LOG(info) << "Intermediate tensors will be written to: " << result;
1955 Optimizer::Pass(optGraph, MakeOptimizations(InsertDebugToFileLayer()));
1956 }
1957 catch (const armnn::RuntimeException& e)
1958 {
1959 // If we cannot create the output directory then we'll issue a warning and continue.
1960 ARMNN_LOG(warning) << "Unable to print intermediate layer outputs : " << e.what();
1961 }
1962 }
1963
1964 // Calculate the compatibility strategies for tensor handles
1965 OptimizationResult strategyResult = SelectTensorHandleStrategy(optGraph,
1966 backends,
1967 tensorHandleFactoryRegistry,
1968 options.GetImportEnabled(),
1969 options.GetExportEnabled(),
1970 messages);
1971
1972 if (strategyResult.m_Error)
1973 {
1974 // Failed to apply the backend-specific optimizations
1975 return IOptimizedNetworkPtr(nullptr, &IOptimizedNetwork::Destroy);
1976 }
1977
1978 // Based on the tensor handle strategy determined above, insert copy layers where required.
1979 {
1980 ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "Optimizer_AddCompatibilityLayers");
1981 optGraph.AddCompatibilityLayers(backends, tensorHandleFactoryRegistry);
1982 }
1983
1984 return optNet;
1985 }
1986
1987 // Forwarding function to remain backward compatible with legacy OptimizerOptions
Optimize(const INetwork & inNetwork,const std::vector<BackendId> & backendPreferences,const IDeviceSpec & deviceSpec,const OptimizerOptions & options,Optional<std::vector<std::string> &> messages)1988 IOptimizedNetworkPtr Optimize(const INetwork& inNetwork,
1989 const std::vector<BackendId>& backendPreferences,
1990 const IDeviceSpec& deviceSpec,
1991 const OptimizerOptions& options,
1992 Optional<std::vector<std::string>&> messages)
1993 {
1994 return Optimize(inNetwork,
1995 backendPreferences,
1996 deviceSpec,
1997 OptimizerOptionsOpaque(options),
1998 messages);
1999 }
2000
Optimize(const INetwork & inNetwork,const std::vector<BackendId> & backendPreferences,const IDeviceSpec & deviceSpec,const OptimizerOptionsOpaque & options,Optional<std::vector<std::string> &> messages)2001 IOptimizedNetworkPtr Optimize(const INetwork& inNetwork,
2002 const std::vector<BackendId>& backendPreferences,
2003 const IDeviceSpec& deviceSpec,
2004 const OptimizerOptionsOpaque& options,
2005 Optional<std::vector<std::string>&> messages)
2006 {
2007 return Optimize(inNetwork.pNetworkImpl->GetGraph(),
2008 backendPreferences,
2009 deviceSpec,
2010 options,
2011 messages);
2012 }
2013
GetShapeInferenceMethod()2014 bool NetworkImpl::GetShapeInferenceMethod()
2015 {
2016 bool shapeInferenceMethod = false;
2017
2018 ParseOptions(m_NetworkOptions, "ShapeInferenceMethod", [&](std::string name, const BackendOptions::Var& value)
2019 {
2020 if (name == "InferAndValidate")
2021 {
2022 shapeInferenceMethod |= value.AsBool();
2023 }
2024 });
2025 return shapeInferenceMethod;
2026 }
2027
GetAllowExpandedDims()2028 bool NetworkImpl::GetAllowExpandedDims()
2029 {
2030 bool allowExpandedDims = false;
2031
2032 ParseOptions(m_NetworkOptions, "AllowExpandedDims", [&](std::string name, const BackendOptions::Var& value)
2033 {
2034 if (name == "AllowExpandedDims")
2035 {
2036 allowExpandedDims |= value.AsBool();
2037 }
2038 });
2039 return allowExpandedDims;
2040 }
2041
NetworkImpl(const NetworkOptions & networkOptions)2042 NetworkImpl::NetworkImpl(const NetworkOptions& networkOptions)
2043 : m_NetworkOptions(networkOptions),
2044 m_Graph(std::make_unique<Graph>(GetShapeInferenceMethod(), GetAllowExpandedDims()))
2045 {}
2046
~NetworkImpl()2047 NetworkImpl::~NetworkImpl()
2048 {
2049 }
2050
PrintGraph()2051 Status NetworkImpl::PrintGraph()
2052 {
2053 m_Graph->Print();
2054 return Status::Success;
2055 }
2056
AddInputLayer(LayerBindingId id,const char * name)2057 IConnectableLayer* NetworkImpl::AddInputLayer(LayerBindingId id, const char* name)
2058 {
2059 return m_Graph->AddLayer<InputLayer>(id, name);
2060 }
2061
AddBatchToSpaceNdLayer(const BatchToSpaceNdDescriptor & batchToSpaceNdDescriptor,const char * name)2062 IConnectableLayer* NetworkImpl::AddBatchToSpaceNdLayer(const BatchToSpaceNdDescriptor& batchToSpaceNdDescriptor,
2063 const char* name)
2064 {
2065 return m_Graph->AddLayer<BatchToSpaceNdLayer>(batchToSpaceNdDescriptor, name);
2066 }
2067
AddCastLayer(const char * name)2068 IConnectableLayer* NetworkImpl::AddCastLayer(const char* name)
2069 {
2070 return m_Graph->AddLayer<CastLayer>(name);
2071 }
AddChannelShuffleLayer(const ChannelShuffleDescriptor & channelShuffleDescriptor,const char * name)2072 IConnectableLayer* NetworkImpl::AddChannelShuffleLayer(const ChannelShuffleDescriptor& channelShuffleDescriptor,
2073 const char* name)
2074 {
2075 return m_Graph->AddLayer<ChannelShuffleLayer>(channelShuffleDescriptor, name);
2076 }
2077
AddComparisonLayer(const ComparisonDescriptor & comparisonDescriptor,const char * name)2078 IConnectableLayer* NetworkImpl::AddComparisonLayer(const ComparisonDescriptor& comparisonDescriptor,
2079 const char* name)
2080 {
2081 return m_Graph->AddLayer<ComparisonLayer>(comparisonDescriptor, name);
2082 }
2083
AddElementwiseBinaryLayer(const ElementwiseBinaryDescriptor & elementwiseBinaryDesc,const char * name)2084 IConnectableLayer* NetworkImpl::AddElementwiseBinaryLayer(const ElementwiseBinaryDescriptor& elementwiseBinaryDesc,
2085 const char* name)
2086 {
2087 return m_Graph->AddLayer<ElementwiseBinaryLayer>(elementwiseBinaryDesc, name);
2088 }
2089
AddElementwiseUnaryLayer(const ElementwiseUnaryDescriptor & elementwiseUnaryDescriptor,const char * name)2090 IConnectableLayer* NetworkImpl::AddElementwiseUnaryLayer(const ElementwiseUnaryDescriptor& elementwiseUnaryDescriptor,
2091 const char* name)
2092 {
2093 return m_Graph->AddLayer<ElementwiseUnaryLayer>(elementwiseUnaryDescriptor, name);
2094 }
2095
AddFillLayer(const FillDescriptor & fillDescriptor,const char * name)2096 IConnectableLayer* NetworkImpl::AddFillLayer(const FillDescriptor& fillDescriptor,
2097 const char* name)
2098 {
2099 return m_Graph->AddLayer<FillLayer>(fillDescriptor, name);
2100 }
2101
AddFullyConnectedLayer(const FullyConnectedDescriptor & fullyConnectedDescriptor,const char * name)2102 IConnectableLayer* NetworkImpl::AddFullyConnectedLayer(const FullyConnectedDescriptor& fullyConnectedDescriptor,
2103 const char* name)
2104 {
2105 return m_Graph->AddLayer<FullyConnectedLayer>(fullyConnectedDescriptor, name);
2106 }
2107
AddConcatLayer(const ConcatDescriptor & concatDescriptor,const char * name)2108 IConnectableLayer* NetworkImpl::AddConcatLayer(const ConcatDescriptor& concatDescriptor,
2109 const char* name)
2110 {
2111 return m_Graph->AddLayer<ConcatLayer>(concatDescriptor, name);
2112 }
2113
AddConvolution2dLayer(const Convolution2dDescriptor & convolution2dDescriptor,const char * name)2114 IConnectableLayer* NetworkImpl::AddConvolution2dLayer(const Convolution2dDescriptor& convolution2dDescriptor,
2115 const char* name)
2116 {
2117 return m_Graph->AddLayer<Convolution2dLayer>(convolution2dDescriptor, name);
2118 }
2119
AddConvertFp16ToFp32Layer(const char * name)2120 IConnectableLayer* NetworkImpl::AddConvertFp16ToFp32Layer(const char* name)
2121 {
2122 return m_Graph->AddLayer<ConvertFp16ToFp32Layer>(name);
2123 }
2124
AddConvertFp32ToFp16Layer(const char * name)2125 IConnectableLayer* NetworkImpl::AddConvertFp32ToFp16Layer(const char* name)
2126 {
2127 return m_Graph->AddLayer<ConvertFp32ToFp16Layer>(name);
2128 }
2129
AddConvolution3dLayer(const Convolution3dDescriptor & convolution3dDescriptor,const char * name)2130 IConnectableLayer* NetworkImpl::AddConvolution3dLayer(const Convolution3dDescriptor& convolution3dDescriptor,
2131 const char* name)
2132 {
2133 return m_Graph->AddLayer<Convolution3dLayer>(convolution3dDescriptor, name);
2134 }
2135
AddDepthToSpaceLayer(const DepthToSpaceDescriptor & depthToSpaceDescriptor,const char * name)2136 IConnectableLayer* NetworkImpl::AddDepthToSpaceLayer(const DepthToSpaceDescriptor& depthToSpaceDescriptor,
2137 const char* name)
2138 {
2139 return m_Graph->AddLayer<DepthToSpaceLayer>(depthToSpaceDescriptor, name);
2140 }
2141
AddDepthwiseConvolution2dLayer(const DepthwiseConvolution2dDescriptor & convolution2dDescriptor,const char * name)2142 IConnectableLayer* NetworkImpl::AddDepthwiseConvolution2dLayer(
2143 const DepthwiseConvolution2dDescriptor& convolution2dDescriptor,
2144 const char* name)
2145 {
2146 return m_Graph->AddLayer<DepthwiseConvolution2dLayer>(convolution2dDescriptor, name);
2147 }
2148
AddDetectionPostProcessLayer(const armnn::DetectionPostProcessDescriptor & descriptor,const ConstTensor & anchors,const char * name)2149 IConnectableLayer* NetworkImpl::AddDetectionPostProcessLayer(const armnn::DetectionPostProcessDescriptor& descriptor,
2150 const ConstTensor& anchors, const char* name)
2151 {
2152 const auto layer = m_Graph->AddLayer<DetectionPostProcessLayer>(descriptor, name);
2153
2154 layer->m_Anchors = std::make_shared<ScopedTensorHandle>(anchors);
2155
2156 return layer;
2157 }
2158
AddPermuteLayer(const PermuteDescriptor & permuteDescriptor,const char * name)2159 IConnectableLayer* NetworkImpl::AddPermuteLayer(const PermuteDescriptor& permuteDescriptor,
2160 const char* name)
2161 {
2162 return m_Graph->AddLayer<PermuteLayer>(permuteDescriptor, name);
2163 }
2164
AddPooling2dLayer(const Pooling2dDescriptor & pooling2dDescriptor,const char * name)2165 IConnectableLayer* NetworkImpl::AddPooling2dLayer(const Pooling2dDescriptor& pooling2dDescriptor,
2166 const char* name)
2167 {
2168 return m_Graph->AddLayer<Pooling2dLayer>(pooling2dDescriptor, name);
2169 }
2170
AddPooling3dLayer(const Pooling3dDescriptor & pooling3dDescriptor,const char * name)2171 IConnectableLayer* NetworkImpl::AddPooling3dLayer(const Pooling3dDescriptor& pooling3dDescriptor,
2172 const char* name)
2173 {
2174 return m_Graph->AddLayer<Pooling3dLayer>(pooling3dDescriptor, name);
2175 }
2176
AddActivationLayer(const ActivationDescriptor & activationDescriptor,const char * name)2177 IConnectableLayer* NetworkImpl::AddActivationLayer(const ActivationDescriptor& activationDescriptor,
2178 const char* name)
2179 {
2180 return m_Graph->AddLayer<ActivationLayer>(activationDescriptor, name);
2181 }
2182
AddArgMinMaxLayer(const ArgMinMaxDescriptor & argMinMaxDescriptor,const char * name)2183 IConnectableLayer* NetworkImpl::AddArgMinMaxLayer(const ArgMinMaxDescriptor& argMinMaxDescriptor,
2184 const char* name)
2185 {
2186 return m_Graph->AddLayer<ArgMinMaxLayer>(argMinMaxDescriptor, name);
2187 }
2188
AddNormalizationLayer(const NormalizationDescriptor & normalizationDescriptor,const char * name)2189 IConnectableLayer* NetworkImpl::AddNormalizationLayer(const NormalizationDescriptor&
2190 normalizationDescriptor,
2191 const char* name)
2192 {
2193 return m_Graph->AddLayer<NormalizationLayer>(normalizationDescriptor, name);
2194 }
2195
AddSliceLayer(const SliceDescriptor & sliceDescriptor,const char * name)2196 IConnectableLayer* NetworkImpl::AddSliceLayer(const SliceDescriptor& sliceDescriptor, const char* name)
2197 {
2198 return m_Graph->AddLayer<SliceLayer>(sliceDescriptor, name);
2199 }
2200
AddSoftmaxLayer(const SoftmaxDescriptor & softmaxDescriptor,const char * name)2201 IConnectableLayer* NetworkImpl::AddSoftmaxLayer(const SoftmaxDescriptor& softmaxDescriptor,
2202 const char* name)
2203 {
2204 return m_Graph->AddLayer<SoftmaxLayer>(softmaxDescriptor, name);
2205 }
2206
AddSplitterLayer(const ViewsDescriptor & splitterDescriptor,const char * name)2207 IConnectableLayer* NetworkImpl::AddSplitterLayer(const ViewsDescriptor& splitterDescriptor,
2208 const char* name)
2209 {
2210 return m_Graph->AddLayer<SplitterLayer>(splitterDescriptor, name);
2211 }
2212
AddMaximumLayer(const char * name)2213 IConnectableLayer* NetworkImpl::AddMaximumLayer(const char* name)
2214 {
2215 return m_Graph->AddLayer<MaximumLayer>(name);
2216 }
2217
AddMinimumLayer(const char * name)2218 IConnectableLayer* NetworkImpl::AddMinimumLayer(const char* name)
2219 {
2220 return m_Graph->AddLayer<MinimumLayer>(name);
2221 }
2222
AddAdditionLayer(const char * name)2223 IConnectableLayer* NetworkImpl::AddAdditionLayer(const char* name)
2224 {
2225 return m_Graph->AddLayer<AdditionLayer>(name);
2226 }
2227
AddMultiplicationLayer(const char * name)2228 IConnectableLayer* NetworkImpl::AddMultiplicationLayer(const char* name)
2229 {
2230 return m_Graph->AddLayer<MultiplicationLayer>(name);
2231 }
2232
AddOutputLayer(LayerBindingId id,const char * name)2233 IConnectableLayer* NetworkImpl::AddOutputLayer(LayerBindingId id, const char* name)
2234 {
2235 return m_Graph->AddLayer<OutputLayer>(id, name);
2236 }
2237
AddBatchNormalizationLayer(const BatchNormalizationDescriptor & desc,const ConstTensor & mean,const ConstTensor & variance,const ConstTensor & beta,const ConstTensor & gamma,const char * name)2238 IConnectableLayer* NetworkImpl::AddBatchNormalizationLayer(const BatchNormalizationDescriptor& desc,
2239 const ConstTensor& mean,
2240 const ConstTensor& variance,
2241 const ConstTensor& beta,
2242 const ConstTensor& gamma,
2243 const char* name)
2244 {
2245 const auto layer = m_Graph->AddLayer<BatchNormalizationLayer>(desc, name);
2246
2247 layer->m_Mean = std::make_shared<ScopedTensorHandle>(mean);
2248 layer->m_Variance = std::make_shared<ScopedTensorHandle>(variance);
2249 layer->m_Beta = std::make_shared<ScopedTensorHandle>(beta);
2250 layer->m_Gamma = std::make_shared<ScopedTensorHandle>(gamma);
2251
2252 return layer;
2253 }
2254
AddRankLayer(const char * name)2255 IConnectableLayer* NetworkImpl::AddRankLayer(const char* name)
2256 {
2257 return m_Graph->AddLayer<RankLayer>(name);
2258 }
2259
AddReduceLayer(const ReduceDescriptor & reduceDescriptor,const char * name)2260 IConnectableLayer* NetworkImpl::AddReduceLayer(const ReduceDescriptor& reduceDescriptor,
2261 const char* name)
2262 {
2263 return m_Graph->AddLayer<ReduceLayer>(reduceDescriptor, name);
2264 }
2265
AddResizeLayer(const ResizeDescriptor & resizeDescriptor,const char * name)2266 IConnectableLayer* NetworkImpl::AddResizeLayer(const ResizeDescriptor& resizeDescriptor, const char* name)
2267 {
2268 return m_Graph->AddLayer<ResizeLayer>(resizeDescriptor, name);
2269 }
2270
AddShapeLayer(const char * name)2271 IConnectableLayer* NetworkImpl::AddShapeLayer(const char* name)
2272 {
2273 return m_Graph->AddLayer<ShapeLayer>(name);
2274 }
2275
AddInstanceNormalizationLayer(const InstanceNormalizationDescriptor & desc,const char * name)2276 IConnectableLayer* NetworkImpl::AddInstanceNormalizationLayer(const InstanceNormalizationDescriptor& desc,
2277 const char* name)
2278 {
2279 return m_Graph->AddLayer<InstanceNormalizationLayer>(desc, name);
2280 }
2281
AddL2NormalizationLayer(const L2NormalizationDescriptor & desc,const char * name)2282 IConnectableLayer* NetworkImpl::AddL2NormalizationLayer(const L2NormalizationDescriptor& desc,
2283 const char* name)
2284 {
2285 return m_Graph->AddLayer<L2NormalizationLayer>(desc, name);
2286 }
2287
AddLogSoftmaxLayer(const LogSoftmaxDescriptor & desc,const char * name)2288 IConnectableLayer* NetworkImpl::AddLogSoftmaxLayer(const LogSoftmaxDescriptor& desc,
2289 const char* name)
2290 {
2291 return m_Graph->AddLayer<LogSoftmaxLayer>(desc, name);
2292 }
2293
AddConstantLayer(const ConstTensor & input,const char * name)2294 IConnectableLayer* NetworkImpl::AddConstantLayer(const ConstTensor& input, const char* name)
2295 {
2296 auto layer = m_Graph->AddLayer<ConstantLayer>(name);
2297
2298 layer->m_LayerOutput = std::make_shared<ScopedTensorHandle>(input);
2299
2300 return layer;
2301 }
2302
AddReshapeLayer(const ReshapeDescriptor & reshapeDescriptor,const char * name)2303 IConnectableLayer* NetworkImpl::AddReshapeLayer(const ReshapeDescriptor& reshapeDescriptor,
2304 const char* name)
2305 {
2306 return m_Graph->AddLayer<ReshapeLayer>(reshapeDescriptor, name);
2307 }
2308
AddSpaceToBatchNdLayer(const SpaceToBatchNdDescriptor & spaceToBatchNdDescriptor,const char * name)2309 IConnectableLayer* NetworkImpl::AddSpaceToBatchNdLayer(const SpaceToBatchNdDescriptor& spaceToBatchNdDescriptor,
2310 const char* name)
2311 {
2312 return m_Graph->AddLayer<SpaceToBatchNdLayer>(spaceToBatchNdDescriptor, name);
2313 }
2314
AddSpaceToDepthLayer(const SpaceToDepthDescriptor & spaceToDepthDescriptor,const char * name)2315 IConnectableLayer* NetworkImpl::AddSpaceToDepthLayer(const SpaceToDepthDescriptor& spaceToDepthDescriptor,
2316 const char* name)
2317 {
2318 return m_Graph->AddLayer<SpaceToDepthLayer>(spaceToDepthDescriptor, name);
2319 }
2320
AddFloorLayer(const char * name)2321 IConnectableLayer* NetworkImpl::AddFloorLayer(const char* name)
2322 {
2323 return m_Graph->AddLayer<FloorLayer>(name);
2324 }
2325
AddLstmLayer(const LstmDescriptor & descriptor,const LstmInputParams & params,const char * name)2326 IConnectableLayer* NetworkImpl::AddLstmLayer(const LstmDescriptor& descriptor,
2327 const LstmInputParams& params,
2328 const char* name)
2329 {
2330 const auto layer = m_Graph->AddLayer<LstmLayer>(descriptor, name);
2331
2332 //Lstm Basic Parameters
2333 layer->m_BasicParameters.m_InputToForgetWeights =
2334 std::make_shared<ScopedTensorHandle>(*(params.m_InputToForgetWeights));
2335 layer->m_BasicParameters.m_InputToCellWeights =
2336 std::make_shared<ScopedTensorHandle>(*(params.m_InputToCellWeights));
2337 layer->m_BasicParameters.m_InputToOutputWeights =
2338 std::make_shared<ScopedTensorHandle>(*(params.m_InputToOutputWeights));
2339 layer->m_BasicParameters.m_RecurrentToForgetWeights =
2340 std::make_shared<ScopedTensorHandle>(*(params.m_RecurrentToForgetWeights));
2341 layer->m_BasicParameters.m_RecurrentToCellWeights =
2342 std::make_shared<ScopedTensorHandle>(*(params.m_RecurrentToCellWeights));
2343 layer->m_BasicParameters.m_RecurrentToOutputWeights =
2344 std::make_shared<ScopedTensorHandle>(*(params.m_RecurrentToOutputWeights));
2345 layer->m_BasicParameters.m_ForgetGateBias =
2346 std::make_shared<ScopedTensorHandle>(*(params.m_ForgetGateBias));
2347 layer->m_BasicParameters.m_CellBias =
2348 std::make_shared<ScopedTensorHandle>(*(params.m_CellBias));
2349 layer->m_BasicParameters.m_OutputGateBias =
2350 std::make_shared<ScopedTensorHandle>(*(params.m_OutputGateBias));
2351
2352 //Lstm Cifg parameters
2353 if(!descriptor.m_CifgEnabled)
2354 {
2355 if(params.m_InputToInputWeights == nullptr)
2356 {
2357 throw InvalidArgumentException("AddLstmLayer: Input To Input Weights cannot be NULL "
2358 "when CIFG is disabled.");
2359 }
2360 if(params.m_RecurrentToInputWeights == nullptr)
2361 {
2362 throw InvalidArgumentException(
2363 "AddLstmLayer: Recurrent To Input Weights cannot be NULL "
2364 "when CIFG is disabled.");
2365 }
2366 if(params.m_InputGateBias == nullptr)
2367 {
2368 throw InvalidArgumentException("AddLstmLayer: Input Gate Bias cannot be NULL "
2369 "when CIFG is disabled.");
2370 }
2371 layer->m_CifgParameters.m_InputToInputWeights =
2372 std::make_shared<ScopedTensorHandle>(*(params.m_InputToInputWeights));
2373 layer->m_CifgParameters.m_RecurrentToInputWeights =
2374 std::make_shared<ScopedTensorHandle>(*(params.m_RecurrentToInputWeights));
2375 layer->m_CifgParameters.m_InputGateBias =
2376 std::make_shared<ScopedTensorHandle>(*(params.m_InputGateBias));
2377 }
2378
2379 //Lstm projection parameters
2380 if(descriptor.m_ProjectionEnabled)
2381 {
2382 if(params.m_ProjectionWeights == nullptr)
2383 {
2384 throw InvalidArgumentException("AddLstmLayer: Projection Weights cannot be NULL "
2385 "when projection is enabled.");
2386 }
2387 layer->m_ProjectionParameters.m_ProjectionWeights =
2388 std::make_shared<ScopedTensorHandle>(*(params.m_ProjectionWeights));
2389 if(params.m_ProjectionBias != nullptr)
2390 {
2391 layer->m_ProjectionParameters.m_ProjectionBias =
2392 std::make_shared<ScopedTensorHandle>(*(params.m_ProjectionBias));
2393 }
2394 }
2395
2396 //Lstm Peephole params
2397 if(descriptor.m_PeepholeEnabled)
2398 {
2399 if(!descriptor.m_CifgEnabled)
2400 {
2401 if(params.m_CellToInputWeights == nullptr)
2402 {
2403 throw InvalidArgumentException("AddLstmLayer: Cell To Input Weights cannot be NULL "
2404 "when Peephole is enabled and CIFG disabled.");
2405 }
2406
2407 layer->m_PeepholeParameters.m_CellToInputWeights =
2408 std::make_shared<ScopedTensorHandle>(*(params.m_CellToInputWeights));
2409 }
2410
2411 if(params.m_CellToForgetWeights == nullptr)
2412 {
2413 throw InvalidArgumentException("AddLstmLayer: Cell To Forget Weights cannot be NULL "
2414 "when Peephole is enabled.");
2415 }
2416 if(params.m_CellToOutputWeights == nullptr)
2417 {
2418 throw InvalidArgumentException("AddLstmLayer: Cell To Output Weights cannot be NULL "
2419 "when Peephole is enabled.");
2420 }
2421
2422 layer->m_PeepholeParameters.m_CellToForgetWeights =
2423 std::make_shared<ScopedTensorHandle>(*(params.m_CellToForgetWeights));
2424 layer->m_PeepholeParameters.m_CellToOutputWeights =
2425 std::make_shared<ScopedTensorHandle>(*(params.m_CellToOutputWeights));
2426 }
2427
2428 //Lstm Layer Normalization params
2429 if(descriptor.m_LayerNormEnabled)
2430 {
2431 if(!descriptor.m_CifgEnabled)
2432 {
2433 if(params.m_InputLayerNormWeights == nullptr)
2434 {
2435 throw InvalidArgumentException("AddLstmLayer: Input layer normalization weights cannot be NULL "
2436 "when layer normalization is enabled and CIFG disabled.");
2437 }
2438 layer->m_LayerNormParameters.m_InputLayerNormWeights =
2439 std::make_shared<ScopedTensorHandle>(*(params.m_InputLayerNormWeights));
2440 }
2441
2442 if(params.m_ForgetLayerNormWeights == nullptr)
2443 {
2444 throw InvalidArgumentException("AddLstmLayer: Forget layer normalization weights cannot be NULL "
2445 "when layer normalization is enabled.");
2446 }
2447 if(params.m_CellLayerNormWeights == nullptr)
2448 {
2449 throw InvalidArgumentException("AddLstmLayer: Cell layer normalization weights cannot be NULL "
2450 "when layer normalization is enabled.");
2451 }
2452 if(params.m_OutputLayerNormWeights == nullptr)
2453 {
2454 throw InvalidArgumentException("AddLstmLayer: Output layer normalization weights cannot be NULL "
2455 "when layer normalization is enabled.");
2456 }
2457 layer->m_LayerNormParameters.m_ForgetLayerNormWeights =
2458 std::make_shared<ScopedTensorHandle>(*(params.m_ForgetLayerNormWeights));
2459 layer->m_LayerNormParameters.m_CellLayerNormWeights =
2460 std::make_shared<ScopedTensorHandle>(*(params.m_CellLayerNormWeights));
2461 layer->m_LayerNormParameters.m_OutputLayerNormWeights =
2462 std::make_shared<ScopedTensorHandle>(*(params.m_OutputLayerNormWeights));
2463 }
2464 return layer;
2465 }
2466
AddDivisionLayer(const char * name)2467 IConnectableLayer* NetworkImpl::AddDivisionLayer(const char* name)
2468 {
2469 return m_Graph->AddLayer<DivisionLayer>(name);
2470 }
2471
AddSubtractionLayer(const char * name)2472 IConnectableLayer* NetworkImpl::AddSubtractionLayer(const char* name)
2473 {
2474 return m_Graph->AddLayer<SubtractionLayer>(name);
2475 }
2476
AddMeanLayer(const MeanDescriptor & meanDescriptor,const char * name)2477 IConnectableLayer* NetworkImpl::AddMeanLayer(const MeanDescriptor& meanDescriptor, const char* name)
2478 {
2479 return m_Graph->AddLayer<MeanLayer>(meanDescriptor,name);
2480 }
2481
AddPadLayer(const PadDescriptor & padDescriptor,const char * name)2482 IConnectableLayer* NetworkImpl::AddPadLayer(const PadDescriptor& padDescriptor, const char* name)
2483 {
2484 return m_Graph->AddLayer<PadLayer>(padDescriptor,name);
2485 }
2486
AddQuantizeLayer(const char * name)2487 IConnectableLayer *NetworkImpl::AddQuantizeLayer(const char *name)
2488 {
2489 return m_Graph->AddLayer<QuantizeLayer>(name);
2490 }
2491
AddDequantizeLayer(const char * name)2492 IConnectableLayer* NetworkImpl::AddDequantizeLayer(const char* name)
2493 {
2494 return m_Graph->AddLayer<DequantizeLayer>(name);
2495 }
2496
AddStridedSliceLayer(const StridedSliceDescriptor & stridedSliceDescriptor,const char * name)2497 IConnectableLayer* NetworkImpl::AddStridedSliceLayer(const StridedSliceDescriptor& stridedSliceDescriptor,
2498 const char* name)
2499 {
2500 return m_Graph->AddLayer<StridedSliceLayer>(stridedSliceDescriptor, name);
2501 }
2502
AddGatherLayer(const GatherDescriptor & gatherDescriptor,const char * name)2503 IConnectableLayer* NetworkImpl::AddGatherLayer(const GatherDescriptor& gatherDescriptor,
2504 const char* name)
2505 {
2506 return m_Graph->AddLayer<GatherLayer>(gatherDescriptor, name);
2507 }
2508
AddGatherNdLayer(const char * name)2509 IConnectableLayer* NetworkImpl::AddGatherNdLayer(const char* name)
2510 {
2511 return m_Graph->AddLayer<GatherNdLayer>(name);
2512 }
2513
AddMergeLayer(const char * name)2514 IConnectableLayer* NetworkImpl::AddMergeLayer(const char* name)
2515 {
2516 return m_Graph->AddLayer<MergeLayer>(name);
2517 }
2518
AddSwitchLayer(const char * name)2519 IConnectableLayer* NetworkImpl::AddSwitchLayer(const char* name)
2520 {
2521 return m_Graph->AddLayer<SwitchLayer>(name);
2522 }
2523
AddPreluLayer(const char * name)2524 IConnectableLayer* NetworkImpl::AddPreluLayer(const char* name)
2525 {
2526 return m_Graph->AddLayer<PreluLayer>(name);
2527 }
2528
AddTransposeConvolution2dLayer(const TransposeConvolution2dDescriptor & descriptor,const ConstTensor & weights,const Optional<ConstTensor> & biases,const char * name)2529 IConnectableLayer* NetworkImpl::AddTransposeConvolution2dLayer(const TransposeConvolution2dDescriptor& descriptor,
2530 const ConstTensor& weights,
2531 const Optional<ConstTensor>& biases,
2532 const char* name)
2533 {
2534 if (descriptor.m_BiasEnabled && !biases.has_value())
2535 {
2536 throw InvalidArgumentException("AddTransposeConvolution2dLayer: Biases cannot be empty");
2537 }
2538
2539 const auto layer = m_Graph->AddLayer<TransposeConvolution2dLayer>(descriptor, name);
2540
2541 layer->m_Weight = std::make_shared<ScopedTensorHandle>(weights);
2542
2543 if (descriptor.m_BiasEnabled)
2544 {
2545 layer->m_Bias = std::make_shared<ScopedTensorHandle>(biases.value());
2546 }
2547
2548 return layer;
2549 }
2550
AddTransposeLayer(const TransposeDescriptor & transposeDescriptor,const char * name)2551 IConnectableLayer* NetworkImpl::AddTransposeLayer(const TransposeDescriptor& transposeDescriptor,
2552 const char* name)
2553 {
2554 return m_Graph->AddLayer<TransposeLayer>(transposeDescriptor, name);
2555 }
2556
AddStackLayer(const StackDescriptor & stackDescriptor,const char * name)2557 IConnectableLayer* NetworkImpl::AddStackLayer(const StackDescriptor& stackDescriptor,
2558 const char* name)
2559 {
2560 return m_Graph->AddLayer<StackLayer>(stackDescriptor, name);
2561 }
2562
2563
AddStandInLayer(const StandInDescriptor & desc,const char * name)2564 IConnectableLayer* NetworkImpl::AddStandInLayer(const StandInDescriptor& desc,
2565 const char* name)
2566 {
2567 return m_Graph->AddLayer<StandInLayer>(desc, name);
2568 }
2569
AddQuantizedLstmLayer(const QuantizedLstmInputParams & params,const char * name)2570 IConnectableLayer* NetworkImpl::AddQuantizedLstmLayer(const QuantizedLstmInputParams& params,
2571 const char* name)
2572 {
2573 const auto layer = m_Graph->AddLayer<QuantizedLstmLayer>(name);
2574
2575 // InputToX weights
2576 layer->m_QuantizedLstmParameters.m_InputToInputWeights =
2577 std::make_shared<ScopedTensorHandle>(params.GetInputToInputWeights());
2578 layer->m_QuantizedLstmParameters.m_InputToForgetWeights =
2579 std::make_shared<ScopedTensorHandle>(params.GetInputToForgetWeights());
2580 layer->m_QuantizedLstmParameters.m_InputToCellWeights =
2581 std::make_shared<ScopedTensorHandle>(params.GetInputToCellWeights());
2582 layer->m_QuantizedLstmParameters.m_InputToOutputWeights =
2583 std::make_shared<ScopedTensorHandle>(params.GetInputToOutputWeights());
2584
2585 // RecurrentToX weights
2586 layer->m_QuantizedLstmParameters.m_RecurrentToInputWeights =
2587 std::make_shared<ScopedTensorHandle>(params.GetRecurrentToInputWeights());
2588 layer->m_QuantizedLstmParameters.m_RecurrentToForgetWeights =
2589 std::make_shared<ScopedTensorHandle>(params.GetRecurrentToForgetWeights());
2590 layer->m_QuantizedLstmParameters.m_RecurrentToCellWeights =
2591 std::make_shared<ScopedTensorHandle>(params.GetRecurrentToCellWeights());
2592 layer->m_QuantizedLstmParameters.m_RecurrentToOutputWeights =
2593 std::make_shared<ScopedTensorHandle>(params.GetRecurrentToOutputWeights());
2594
2595 // Bias
2596 layer->m_QuantizedLstmParameters.m_InputGateBias =
2597 std::make_shared<ScopedTensorHandle>(params.GetInputGateBias());
2598 layer->m_QuantizedLstmParameters.m_ForgetGateBias =
2599 std::make_shared<ScopedTensorHandle>(params.GetForgetGateBias());
2600 layer->m_QuantizedLstmParameters.m_CellBias =
2601 std::make_shared<ScopedTensorHandle>(params.GetCellBias());
2602 layer->m_QuantizedLstmParameters.m_OutputGateBias =
2603 std::make_shared<ScopedTensorHandle>(params.GetOutputGateBias());
2604
2605 return layer;
2606 }
2607
AddQLstmLayer(const QLstmDescriptor & descriptor,const LstmInputParams & params,const char * name)2608 IConnectableLayer* NetworkImpl::AddQLstmLayer(const QLstmDescriptor& descriptor,
2609 const LstmInputParams& params,
2610 const char* name)
2611 {
2612 const auto layer = m_Graph->AddLayer<QLstmLayer>(descriptor, name);
2613
2614 // QLstm Basic Parameters
2615 layer->m_BasicParameters.m_InputToForgetWeights =
2616 std::make_shared<ScopedTensorHandle>(*(params.m_InputToForgetWeights));
2617 layer->m_BasicParameters.m_InputToCellWeights =
2618 std::make_shared<ScopedTensorHandle>(*(params.m_InputToCellWeights));
2619 layer->m_BasicParameters.m_InputToOutputWeights =
2620 std::make_shared<ScopedTensorHandle>(*(params.m_InputToOutputWeights));
2621 layer->m_BasicParameters.m_RecurrentToForgetWeights =
2622 std::make_shared<ScopedTensorHandle>(*(params.m_RecurrentToForgetWeights));
2623 layer->m_BasicParameters.m_RecurrentToCellWeights =
2624 std::make_shared<ScopedTensorHandle>(*(params.m_RecurrentToCellWeights));
2625 layer->m_BasicParameters.m_RecurrentToOutputWeights =
2626 std::make_shared<ScopedTensorHandle>(*(params.m_RecurrentToOutputWeights));
2627 layer->m_BasicParameters.m_ForgetGateBias =
2628 std::make_shared<ScopedTensorHandle>(*(params.m_ForgetGateBias));
2629 layer->m_BasicParameters.m_CellBias =
2630 std::make_shared<ScopedTensorHandle>(*(params.m_CellBias));
2631 layer->m_BasicParameters.m_OutputGateBias =
2632 std::make_shared<ScopedTensorHandle>(*(params.m_OutputGateBias));
2633
2634 // QLstm Cifg parameters
2635 if(!descriptor.m_CifgEnabled)
2636 {
2637 if(params.m_InputToInputWeights == nullptr)
2638 {
2639 throw InvalidArgumentException("AddQLstmLayer: Input To Input Weights cannot be NULL");
2640 }
2641
2642 if(params.m_RecurrentToInputWeights == nullptr)
2643 {
2644 throw InvalidArgumentException(
2645 "AddQLstmLayer: Recurrent To Input Weights cannot be NULL");
2646 }
2647
2648 if(params.m_InputGateBias == nullptr)
2649 {
2650 throw InvalidArgumentException("AddQLstmLayer: Input Gate Bias cannot be NULL");
2651 }
2652
2653 layer->m_CifgParameters.m_InputToInputWeights =
2654 std::make_shared<ScopedTensorHandle>(*(params.m_InputToInputWeights));
2655 layer->m_CifgParameters.m_RecurrentToInputWeights =
2656 std::make_shared<ScopedTensorHandle>(*(params.m_RecurrentToInputWeights));
2657 layer->m_CifgParameters.m_InputGateBias =
2658 std::make_shared<ScopedTensorHandle>(*(params.m_InputGateBias));
2659 }
2660
2661 // QLstm Projection parameters
2662 if(descriptor.m_ProjectionEnabled)
2663 {
2664 if(params.m_ProjectionWeights == nullptr)
2665 {
2666 throw InvalidArgumentException("AddQLstmLayer: Projection Weights cannot be NULL");
2667 }
2668
2669 layer->m_ProjectionParameters.m_ProjectionWeights =
2670 std::make_shared<ScopedTensorHandle>(*(params.m_ProjectionWeights));
2671
2672 // Projection bias is optional even if projection is enabled
2673 if(params.m_ProjectionBias != nullptr)
2674 {
2675 layer->m_ProjectionParameters.m_ProjectionBias =
2676 std::make_shared<ScopedTensorHandle>(*(params.m_ProjectionBias));
2677 }
2678
2679 }
2680
2681 // QLstm Peephole params
2682 if(descriptor.m_PeepholeEnabled)
2683 {
2684 if(params.m_CellToForgetWeights == nullptr)
2685 {
2686 throw InvalidArgumentException("AddQLstmLayer: Cell To Forget Weights cannot be NULL");
2687 }
2688
2689 if(params.m_CellToOutputWeights == nullptr)
2690 {
2691 throw InvalidArgumentException("AddQLstmLayer: Cell To Output Weights cannot be NULL");
2692 }
2693
2694 if(!descriptor.m_CifgEnabled)
2695 {
2696 if(params.m_CellToInputWeights == nullptr)
2697 {
2698 throw InvalidArgumentException("AddQLstmLayer: Cell To Input Weights cannot be NULL");
2699 }
2700
2701 layer->m_PeepholeParameters.m_CellToInputWeights =
2702 std::make_shared<ScopedTensorHandle>(*(params.m_CellToInputWeights));
2703 }
2704
2705 layer->m_PeepholeParameters.m_CellToForgetWeights =
2706 std::make_shared<ScopedTensorHandle>(*(params.m_CellToForgetWeights));
2707 layer->m_PeepholeParameters.m_CellToOutputWeights =
2708 std::make_shared<ScopedTensorHandle>(*(params.m_CellToOutputWeights));
2709 }
2710
2711 // QLstm Layer Normalization params
2712 if(descriptor.m_LayerNormEnabled)
2713 {
2714 if(params.m_ForgetLayerNormWeights == nullptr)
2715 {
2716 throw InvalidArgumentException("AddQLstmLayer: Forget layer normalization weights cannot be NULL");
2717 }
2718
2719 if(params.m_CellLayerNormWeights == nullptr)
2720 {
2721 throw InvalidArgumentException("AddQLstmLayer: Cell layer normalization weights cannot be NULL");
2722 }
2723
2724 if(params.m_OutputLayerNormWeights == nullptr)
2725 {
2726 throw InvalidArgumentException("AddQLstmLayer: Output layer normalization weights cannot be NULL");
2727 }
2728
2729 if(!descriptor.m_CifgEnabled)
2730 {
2731 if(params.m_InputLayerNormWeights == nullptr)
2732 {
2733 throw InvalidArgumentException("AddQLstmLayer: Input layer normalization weights cannot be NULL");
2734 }
2735
2736 layer->m_LayerNormParameters.m_InputLayerNormWeights =
2737 std::make_shared<ScopedTensorHandle>(*(params.m_InputLayerNormWeights));
2738 }
2739
2740 layer->m_LayerNormParameters.m_ForgetLayerNormWeights =
2741 std::make_shared<ScopedTensorHandle>(*(params.m_ForgetLayerNormWeights));
2742 layer->m_LayerNormParameters.m_CellLayerNormWeights =
2743 std::make_shared<ScopedTensorHandle>(*(params.m_CellLayerNormWeights));
2744 layer->m_LayerNormParameters.m_OutputLayerNormWeights =
2745 std::make_shared<ScopedTensorHandle>(*(params.m_OutputLayerNormWeights));
2746 }
2747 return layer;
2748 }
2749
AddLogicalBinaryLayer(const LogicalBinaryDescriptor & logicalBinaryDescriptor,const char * name)2750 IConnectableLayer* NetworkImpl::AddLogicalBinaryLayer(const LogicalBinaryDescriptor& logicalBinaryDescriptor,
2751 const char* name)
2752 {
2753 return m_Graph->AddLayer<LogicalBinaryLayer>(logicalBinaryDescriptor, name);
2754 }
2755
AddUnidirectionalSequenceLstmLayer(const UnidirectionalSequenceLstmDescriptor & descriptor,const LstmInputParams & params,const char * name)2756 IConnectableLayer* NetworkImpl::AddUnidirectionalSequenceLstmLayer(
2757 const UnidirectionalSequenceLstmDescriptor& descriptor,
2758 const LstmInputParams& params,
2759 const char* name)
2760 {
2761 const auto layer = m_Graph->AddLayer<UnidirectionalSequenceLstmLayer>(descriptor, name);
2762
2763 //Lstm Basic Parameters
2764 layer->m_BasicParameters.m_InputToForgetWeights =
2765 std::make_shared<ScopedTensorHandle>(*(params.m_InputToForgetWeights));
2766 layer->m_BasicParameters.m_InputToCellWeights =
2767 std::make_shared<ScopedTensorHandle>(*(params.m_InputToCellWeights));
2768 layer->m_BasicParameters.m_InputToOutputWeights =
2769 std::make_shared<ScopedTensorHandle>(*(params.m_InputToOutputWeights));
2770 layer->m_BasicParameters.m_RecurrentToForgetWeights =
2771 std::make_shared<ScopedTensorHandle>(*(params.m_RecurrentToForgetWeights));
2772 layer->m_BasicParameters.m_RecurrentToCellWeights =
2773 std::make_shared<ScopedTensorHandle>(*(params.m_RecurrentToCellWeights));
2774 layer->m_BasicParameters.m_RecurrentToOutputWeights =
2775 std::make_shared<ScopedTensorHandle>(*(params.m_RecurrentToOutputWeights));
2776 layer->m_BasicParameters.m_ForgetGateBias =
2777 std::make_shared<ScopedTensorHandle>(*(params.m_ForgetGateBias));
2778 layer->m_BasicParameters.m_CellBias =
2779 std::make_shared<ScopedTensorHandle>(*(params.m_CellBias));
2780 layer->m_BasicParameters.m_OutputGateBias =
2781 std::make_shared<ScopedTensorHandle>(*(params.m_OutputGateBias));
2782
2783 //Lstm Cifg parameters
2784 if(!descriptor.m_CifgEnabled)
2785 {
2786 if(params.m_InputToInputWeights == nullptr)
2787 {
2788 throw InvalidArgumentException("AddUnidirectionalSequenceLstmLayer: Input To Input Weights cannot be NULL "
2789 "when CIFG is disabled.");
2790 }
2791 if(params.m_RecurrentToInputWeights == nullptr)
2792 {
2793 throw InvalidArgumentException(
2794 "AddUnidirectionalSequenceLstmLayer: Recurrent To Input Weights cannot be NULL "
2795 "when CIFG is disabled.");
2796 }
2797 if(params.m_InputGateBias == nullptr)
2798 {
2799 throw InvalidArgumentException("AddUnidirectionalSequenceLstmLayer: Input Gate Bias cannot be NULL "
2800 "when CIFG is disabled.");
2801 }
2802 layer->m_CifgParameters.m_InputToInputWeights =
2803 std::make_shared<ScopedTensorHandle>(*(params.m_InputToInputWeights));
2804 layer->m_CifgParameters.m_RecurrentToInputWeights =
2805 std::make_shared<ScopedTensorHandle>(*(params.m_RecurrentToInputWeights));
2806 layer->m_CifgParameters.m_InputGateBias =
2807 std::make_shared<ScopedTensorHandle>(*(params.m_InputGateBias));
2808 }
2809
2810 //Lstm projection parameters
2811 if(descriptor.m_ProjectionEnabled)
2812 {
2813 if(params.m_ProjectionWeights == nullptr)
2814 {
2815 throw InvalidArgumentException("AddUnidirectionalSequenceLstmLayer: Projection Weights cannot be NULL "
2816 "when projection is enabled.");
2817 }
2818 layer->m_ProjectionParameters.m_ProjectionWeights =
2819 std::make_shared<ScopedTensorHandle>(*(params.m_ProjectionWeights));
2820 if(params.m_ProjectionBias != nullptr)
2821 {
2822 layer->m_ProjectionParameters.m_ProjectionBias =
2823 std::make_shared<ScopedTensorHandle>(*(params.m_ProjectionBias));
2824 }
2825 }
2826
2827 //Lstm Peephole params
2828 if(descriptor.m_PeepholeEnabled)
2829 {
2830 if(!descriptor.m_CifgEnabled)
2831 {
2832 if(params.m_CellToInputWeights == nullptr)
2833 {
2834 throw InvalidArgumentException("AddUnidirectionalSequenceLstmLayer: Cell To Input Weights "
2835 "cannot be NULL when Peephole is enabled and CIFG disabled.");
2836 }
2837
2838 layer->m_PeepholeParameters.m_CellToInputWeights =
2839 std::make_shared<ScopedTensorHandle>(*(params.m_CellToInputWeights));
2840 }
2841
2842 if(params.m_CellToForgetWeights == nullptr)
2843 {
2844 throw InvalidArgumentException("AddUnidirectionalSequenceLstmLayer: Cell To Forget Weights cannot be NULL "
2845 "when Peephole is enabled.");
2846 }
2847 if(params.m_CellToOutputWeights == nullptr)
2848 {
2849 throw InvalidArgumentException("AddUnidirectionalSequenceLstmLayer: Cell To Output Weights cannot be NULL "
2850 "when Peephole is enabled.");
2851 }
2852
2853 layer->m_PeepholeParameters.m_CellToForgetWeights =
2854 std::make_shared<ScopedTensorHandle>(*(params.m_CellToForgetWeights));
2855 layer->m_PeepholeParameters.m_CellToOutputWeights =
2856 std::make_shared<ScopedTensorHandle>(*(params.m_CellToOutputWeights));
2857 }
2858
2859 //Lstm Layer Normalization params
2860 if(descriptor.m_LayerNormEnabled)
2861 {
2862 if(!descriptor.m_CifgEnabled)
2863 {
2864 if(params.m_InputLayerNormWeights == nullptr)
2865 {
2866 throw InvalidArgumentException("AddUnidirectionalSequenceLstmLayer: Input layer normalization weights "
2867 "cannot be NULL when layer normalization is enabled and CIFG disabled.");
2868 }
2869 layer->m_LayerNormParameters.m_InputLayerNormWeights =
2870 std::make_shared<ScopedTensorHandle>(*(params.m_InputLayerNormWeights));
2871 }
2872
2873 if(params.m_ForgetLayerNormWeights == nullptr)
2874 {
2875 throw InvalidArgumentException("AddUnidirectionalSequenceLstmLayer: Forget layer normalization weights "
2876 "cannot be NULL when layer normalization is enabled.");
2877 }
2878 if(params.m_CellLayerNormWeights == nullptr)
2879 {
2880 throw InvalidArgumentException("AddUnidirectionalSequenceLstmLayer: Cell layer normalization weights "
2881 "cannot be NULL when layer normalization is enabled.");
2882 }
2883 if(params.m_OutputLayerNormWeights == nullptr)
2884 {
2885 throw InvalidArgumentException("AddUnidirectionalSequenceLstmLayer: Output layer normalization weights "
2886 "cannot be NULL when layer normalization is enabled.");
2887 }
2888 layer->m_LayerNormParameters.m_ForgetLayerNormWeights =
2889 std::make_shared<ScopedTensorHandle>(*(params.m_ForgetLayerNormWeights));
2890 layer->m_LayerNormParameters.m_CellLayerNormWeights =
2891 std::make_shared<ScopedTensorHandle>(*(params.m_CellLayerNormWeights));
2892 layer->m_LayerNormParameters.m_OutputLayerNormWeights =
2893 std::make_shared<ScopedTensorHandle>(*(params.m_OutputLayerNormWeights));
2894 }
2895 return layer;
2896 }
2897
AddBatchMatMulLayer(const BatchMatMulDescriptor & desc,const char * name)2898 IConnectableLayer* NetworkImpl::AddBatchMatMulLayer(const BatchMatMulDescriptor& desc, const char* name)
2899 {
2900 return m_Graph->AddLayer<BatchMatMulLayer>(desc, name);
2901 }
2902
AddPrecompiledLayer(const PreCompiledDescriptor & preCompiledDescriptor,CompiledBlobPtr compiledBlobPtr,const Optional<BackendId> & backend,const char * name)2903 IConnectableLayer* NetworkImpl::AddPrecompiledLayer(const PreCompiledDescriptor& preCompiledDescriptor,
2904 CompiledBlobPtr compiledBlobPtr,
2905 const Optional<BackendId>& backend,
2906 const char* name)
2907 {
2908 // Method use is for backend users.
2909 PreCompiledLayer* layer;
2910 if (name)
2911 {
2912 layer = m_Graph->AddLayer<PreCompiledLayer>(preCompiledDescriptor, name);
2913 }
2914 else
2915 {
2916 layer = m_Graph->AddLayer<PreCompiledLayer>(preCompiledDescriptor, "pre-compiled");
2917 }
2918
2919 // Assign the pre-compiled object to layer
2920 // Pass only one compiled network, Arm NN does not handle multiple
2921 // pre-compiled objects in a single pre-compiled layer currently
2922 layer->SetPreCompiledObject(std::move(compiledBlobPtr));
2923
2924 if (backend.has_value())
2925 {
2926 layer->SetBackendId(backend.value());
2927 }
2928 else if (layer->GetBackendHint().has_value())
2929 {
2930 layer->SetBackendId(layer->GetBackendHint().value());
2931 }
2932
2933 return layer;
2934 }
2935
ExecuteStrategy(IStrategy & strategy) const2936 void NetworkImpl::ExecuteStrategy(IStrategy& strategy) const
2937 {
2938 for (auto layer : GetGraph())
2939 {
2940 layer->ExecuteStrategy(strategy);
2941 };
2942 }
2943
OptimizedNetworkImpl(const OptimizedNetworkImpl & other,const ModelOptions & modelOptions)2944 OptimizedNetworkImpl::OptimizedNetworkImpl(const OptimizedNetworkImpl& other, const ModelOptions& modelOptions)
2945 : m_Graph(new Graph(*other.m_Graph.get()))
2946 , m_Guid(arm::pipe::IProfilingService::GetNextGuid())
2947 , m_ModelOptions(modelOptions)
2948 {
2949 }
2950
OptimizedNetworkImpl(std::unique_ptr<Graph> graph)2951 OptimizedNetworkImpl::OptimizedNetworkImpl(std::unique_ptr<Graph> graph)
2952 : m_Graph(std::move(graph)), m_Guid(arm::pipe::IProfilingService::GetNextGuid())
2953 {
2954 }
2955
OptimizedNetworkImpl(std::unique_ptr<Graph> graph,const ModelOptions & modelOptions)2956 OptimizedNetworkImpl::OptimizedNetworkImpl(std::unique_ptr<Graph> graph, const ModelOptions& modelOptions)
2957 : m_Graph(std::move(graph)), m_Guid(arm::pipe::IProfilingService::GetNextGuid()), m_ModelOptions(modelOptions)
2958 {
2959 }
2960
~OptimizedNetworkImpl()2961 OptimizedNetworkImpl::~OptimizedNetworkImpl()
2962 {
2963 }
2964
ExecuteStrategy(IStrategy & strategy) const2965 void IOptimizedNetwork::ExecuteStrategy(IStrategy &strategy) const
2966 {
2967 pOptimizedNetworkImpl->ExecuteStrategy(strategy);
2968 }
2969
ExecuteStrategy(IStrategy & strategy) const2970 void OptimizedNetworkImpl::ExecuteStrategy(IStrategy &strategy) const
2971 {
2972 for (auto layer : GetGraph())
2973 {
2974 layer->ExecuteStrategy(strategy);
2975 };
2976 }
2977
2978 } // namespace armnn
2979