1 // 2 // Copyright © 2017-2023 Arm Ltd and Contributors. All rights reserved. 3 // SPDX-License-Identifier: MIT 4 // 5 #pragma once 6 7 #include <armnn/BackendOptions.hpp> 8 #include <armnn/Deprecated.hpp> 9 #include <armnn/DescriptorsFwd.hpp> 10 #include <armnn/IStrategy.hpp> 11 #include <armnn/NetworkFwd.hpp> 12 #include <armnn/Optional.hpp> 13 #include <armnn/TensorFwd.hpp> 14 #include <armnn/Logging.hpp> 15 #include <armnn/backends/TensorHandle.hpp> 16 17 #include <memory> 18 #include <vector> 19 20 namespace armnn 21 { 22 /// @brief An input connection slot for a layer. 23 /// The input slot can be connected to an output slot of the preceding layer in the graph. 24 /// Only one connection to the input slot is allowed. 25 class IInputSlot 26 { 27 public: 28 virtual const IOutputSlot* GetConnection() const = 0; 29 virtual IOutputSlot* GetConnection() = 0; 30 virtual const IConnectableLayer& GetOwningIConnectableLayer() const = 0; 31 virtual IConnectableLayer& GetOwningIConnectableLayer() = 0; 32 virtual unsigned int GetSlotIndex() const = 0; 33 34 protected: 35 /// Not user deletable. ~IInputSlot()36 ~IInputSlot() {} 37 }; 38 39 /// @brief An output connection slot for a layer. 40 /// The output slot may be connected to 1 or more input slots of subsequent layers in the graph. 41 class IOutputSlot 42 { 43 public: 44 virtual unsigned int GetNumConnections() const = 0; 45 virtual const IInputSlot* GetConnection(unsigned int index) const = 0; 46 virtual IInputSlot* GetConnection(unsigned int outputindex) = 0; 47 48 virtual void SetTensorInfo(const TensorInfo& tensorInfo) = 0; 49 virtual const TensorInfo& GetTensorInfo() const = 0; 50 virtual bool IsTensorInfoSet() const = 0; 51 52 virtual int Connect(IInputSlot& destination) = 0; 53 virtual void Disconnect(IInputSlot& slot) = 0; 54 55 virtual unsigned int CalculateIndexOnOwner() const = 0; 56 57 virtual LayerGuid GetOwningLayerGuid() const = 0; 58 59 virtual const IConnectableLayer& GetOwningIConnectableLayer() const = 0; 60 virtual IConnectableLayer& GetOwningIConnectableLayer() = 0; 61 62 protected: 63 /// Not user deletable. ~IOutputSlot()64 ~IOutputSlot() {} 65 }; 66 67 /// @brief Interface for a layer that is connectable to other layers via InputSlots and OutputSlots. 68 class IConnectableLayer 69 { 70 public: 71 /// Returns the name of the layer 72 virtual const char* GetName() const = 0; 73 74 /// Returns the number of connectable input slots 75 virtual unsigned int GetNumInputSlots() const = 0; 76 77 /// Returns the number of connectable output slots 78 virtual unsigned int GetNumOutputSlots() const = 0; 79 80 /// Get a const input slot handle by slot index 81 virtual const IInputSlot& GetInputSlot(unsigned int index) const = 0; 82 83 /// Get the input slot handle by slot index 84 virtual IInputSlot& GetInputSlot(unsigned int index) = 0; 85 86 /// Get the const output slot handle by slot index 87 virtual const IOutputSlot& GetOutputSlot(unsigned int index) const = 0; 88 89 /// Get the output slot handle by slot index 90 virtual IOutputSlot& GetOutputSlot(unsigned int index) = 0; 91 92 /// Infer the shape of the output(s) based on the provided input shape(s) 93 virtual std::vector<TensorShape> InferOutputShapes(const std::vector<TensorShape>& inputShapes) const = 0; 94 95 /// Returns the unique id of the layer 96 virtual LayerGuid GetGuid() const = 0; 97 98 /// Apply a visitor to this layer 99 virtual void ExecuteStrategy(IStrategy& strategy) const = 0; 100 101 /// Provide a hint for the optimizer as to which backend to prefer for this layer. 102 /// By providing a BackendSelectionHint there is no guarantee the input backend supports that layer. 103 /// If IsLayerSupported() returns false with the backend hint, we default to calling IsLayerSupported() 104 /// on the BackendPreferences vector. Use SetBackendId() if we can guarantee a backend supports that 105 /// layer (IsLayerSupported returns true for a specific backend). 106 virtual void BackendSelectionHint(Optional<BackendId> backend) = 0; 107 108 /// Returns the armnn::LayerType of this layer 109 virtual LayerType GetType() const = 0; 110 111 /// If the layer has a descriptor return it. 112 /// The base descriptor can then be cast to the correct descriptor class. 113 /// If the layer has no associated descriptor a struct of type NullDescriptor will be returned. 114 /// Note: NullDescriptors can be detected because they return true when 115 /// the BaseDescriptor IsNull function is invoked. 116 virtual const BaseDescriptor& GetParameters() const = 0; 117 118 /// Set the backend of the IConnectableLayer. 119 /// By using SetBackendId() we guarantee that the input backend supports that 120 /// layer (IsLayerSupported returns true for a specific backend). If there is 121 /// no guarantee the input backend supports that layer use BackendSelectionHint(). 122 virtual void SetBackendId(const BackendId& id) = 0; 123 124 using ConstantTensors = std::vector<std::reference_wrapper<std::shared_ptr<ConstTensorHandle>>>; 125 126 // Returns ConstantTensors of this Layer if it has any, otherwise returns empty vector. 127 virtual ConstantTensors GetConstantTensorsByRef() = 0; 128 129 using ImmutableConstantTensors = std::vector<std::reference_wrapper<const std::shared_ptr<ConstTensorHandle>>>; 130 131 // Returns ConstantTensors of this Layer if it has any, otherwise returns empty vector. 132 virtual ImmutableConstantTensors GetConstantTensorsByRef() const = 0; 133 134 protected: 135 /// Objects are not deletable via the handle ~IConnectableLayer()136 ~IConnectableLayer() {} 137 }; 138 139 struct OptimizerOptions 140 { 141 ARMNN_DEPRECATED_MSG_REMOVAL_DATE("Use ABI stable OptimizerOptionsOpaque instead.", "24.02") OptimizerOptionsarmnn::OptimizerOptions142 OptimizerOptions() 143 : m_ReduceFp32ToFp16(false) 144 , m_Debug(false) 145 , m_DebugToFile(false) 146 , m_ReduceFp32ToBf16(false) 147 , m_shapeInferenceMethod(armnn::ShapeInferenceMethod::ValidateOnly) 148 , m_ImportEnabled(false) 149 , m_ModelOptions() 150 , m_ProfilingEnabled(false) 151 , m_ExportEnabled(false) 152 , m_AllowExpandedDims(false) 153 {} 154 155 ARMNN_DEPRECATED_MSG_REMOVAL_DATE("Use ABI stable OptimizerOptionsOpaque instead.", "24.02") OptimizerOptionsarmnn::OptimizerOptions156 OptimizerOptions(bool reduceFp32ToFp16, bool debug, bool reduceFp32ToBf16, bool importEnabled, 157 ModelOptions modelOptions = {}, bool exportEnabled = false, bool debugToFile = false) 158 : m_ReduceFp32ToFp16(reduceFp32ToFp16) 159 , m_Debug(debug) 160 , m_DebugToFile(debugToFile) 161 , m_ReduceFp32ToBf16(reduceFp32ToBf16) 162 , m_shapeInferenceMethod(armnn::ShapeInferenceMethod::ValidateOnly) 163 , m_ImportEnabled(importEnabled) 164 , m_ModelOptions(modelOptions) 165 , m_ProfilingEnabled(false) 166 , m_ExportEnabled(exportEnabled) 167 , m_AllowExpandedDims(false) 168 { 169 } 170 171 ARMNN_DEPRECATED_MSG_REMOVAL_DATE("Use ABI stable OptimizerOptionsOpaque instead.", "24.02") OptimizerOptionsarmnn::OptimizerOptions172 OptimizerOptions(bool reduceFp32ToFp16, bool debug, bool reduceFp32ToBf16 = false, 173 ShapeInferenceMethod shapeInferenceMethod = armnn::ShapeInferenceMethod::ValidateOnly, 174 bool importEnabled = false, ModelOptions modelOptions = {}, bool exportEnabled = false, 175 bool debugToFile = false, bool allowExpandedDims = false) 176 : m_ReduceFp32ToFp16(reduceFp32ToFp16) 177 , m_Debug(debug) 178 , m_DebugToFile(debugToFile) 179 , m_ReduceFp32ToBf16(reduceFp32ToBf16) 180 , m_shapeInferenceMethod(shapeInferenceMethod) 181 , m_ImportEnabled(importEnabled) 182 , m_ModelOptions(modelOptions) 183 , m_ProfilingEnabled(false) 184 , m_ExportEnabled(exportEnabled) 185 , m_AllowExpandedDims(allowExpandedDims) 186 { 187 } 188 ToStringarmnn::OptimizerOptions189 const std::string ToString() const 190 { 191 std::stringstream stream; 192 stream << "OptimizerOptions: \n"; 193 stream << "\tReduceFp32ToFp16: " << m_ReduceFp32ToFp16 << "\n"; 194 stream << "\tReduceFp32ToBf16: " << m_ReduceFp32ToBf16 << "\n"; 195 stream << "\tDebug: " << m_Debug << "\n"; 196 stream << "\tDebug to file: " << m_DebugToFile << "\n"; 197 stream << "\tShapeInferenceMethod: " << 198 (m_shapeInferenceMethod == ShapeInferenceMethod::ValidateOnly 199 ? "ValidateOnly" : "InferAndValidate") << "\n"; 200 stream << "\tImportEnabled: " << m_ImportEnabled << "\n"; 201 stream << "\tExportEnabled: " << m_ExportEnabled << "\n"; 202 stream << "\tProfilingEnabled: " << m_ProfilingEnabled << "\n"; 203 stream << "\tAllowExpandedDims: " << m_AllowExpandedDims << "\n"; 204 205 stream << "\tModelOptions: \n"; 206 for (auto optionsGroup : m_ModelOptions) 207 { 208 for (size_t i=0; i < optionsGroup.GetOptionCount(); i++) 209 { 210 const armnn::BackendOptions::BackendOption option = optionsGroup.GetOption(i); 211 stream << "\t\tBackend: " << optionsGroup.GetBackendId() << "\n" 212 << "\t\t\tOption: " << option.GetName() << "\n" 213 << "\t\t\tValue: " << std::string(option.GetValue().ToString()) << "\n"; 214 } 215 } 216 217 return stream.str(); 218 } 219 220 /// Reduces all Fp32 operators in the model to Fp16 for faster processing. 221 /// @Note This feature works best if all operators of the model are in Fp32. ArmNN will add conversion layers 222 /// between layers that weren't in Fp32 in the first place or if the operator is not supported in Fp16. 223 /// The overhead of these conversions can lead to a slower overall performance if too many conversions are 224 /// required. 225 bool m_ReduceFp32ToFp16; 226 227 /// Add debug data for easier troubleshooting 228 bool m_Debug; 229 230 /// Pass debug data to separate output files for easier troubleshooting 231 bool m_DebugToFile; 232 233 /// @Note This feature has been replaced by enabling Fast Math in compute library backend options. 234 /// This is currently a placeholder option 235 bool m_ReduceFp32ToBf16; 236 237 /// Infer output size when not available 238 ShapeInferenceMethod m_shapeInferenceMethod; 239 240 /// Enable Import 241 bool m_ImportEnabled; 242 243 /// Enable Model Options 244 ModelOptions m_ModelOptions; 245 246 /// Enable profiling dump of the optimizer phase 247 bool m_ProfilingEnabled; 248 249 /// Enable Export 250 bool m_ExportEnabled; 251 252 /// When calculating tensor sizes, dimensions of size == 1 will be ignored 253 bool m_AllowExpandedDims; 254 }; 255 256 /// ArmNN performs an optimization on each model/network before it gets loaded for execution. OptimizerOptions provides 257 /// a set of features that allows the user to customize this optimization on a per model basis. 258 struct OptimizerOptionsOpaqueImpl; 259 260 class OptimizerOptionsOpaque 261 { 262 public: 263 OptimizerOptionsOpaque(); 264 OptimizerOptionsOpaque(const OptimizerOptionsOpaque& other); 265 ~OptimizerOptionsOpaque(); 266 267 OptimizerOptionsOpaque(const OptimizerOptions& OptimizerStruct); 268 269 OptimizerOptionsOpaque& operator=(OptimizerOptionsOpaque other); 270 271 OptimizerOptionsOpaque(bool reduceFp32ToFp16, bool debug, bool reduceFp32ToBf16, bool importEnabled, 272 ModelOptions modelOptions = {}, bool exportEnabled = false, bool debugToFile = false); 273 274 OptimizerOptionsOpaque(bool reduceFp32ToFp16, bool debug, bool reduceFp32ToBf16 = false, 275 ShapeInferenceMethod shapeInferenceMethod = armnn::ShapeInferenceMethod::ValidateOnly, 276 bool importEnabled = false, ModelOptions modelOptions = {}, bool exportEnabled = false, 277 bool debugToFile = false, bool allowExpandedDims = false); 278 279 const std::string ToString() const; 280 281 bool GetProfilingEnabled() const; 282 283 bool GetImportEnabled() const; 284 285 bool GetExportEnabled() const; 286 287 bool GetReduceFp32ToFp16() const; 288 289 bool GetReduceFp32ToBf16() const; 290 291 bool GetDebugEnabled() const; 292 293 bool GetDebugToFileEnabled() const; 294 295 bool GetAllowExpandedDims() const; 296 297 armnn::ModelOptions GetModelOptions() const; 298 299 armnn::ShapeInferenceMethod GetShapeInferenceMethod() const; 300 301 void SetImportEnabled(bool ImportState); 302 303 void SetExportEnabled(bool ExportState); 304 305 void SetProfilingEnabled(bool ProfilingState); 306 307 void SetDebugEnabled(bool DebugState); 308 309 void SetDebugToFileEnabled(bool DebugFileState); 310 311 void SetReduceFp32ToFp16(bool ReduceFp32ToFp16State); 312 313 void SetShapeInferenceMethod(armnn::ShapeInferenceMethod ShapeInferenceMethodType); 314 315 void AddModelOption(armnn::BackendOptions); 316 317 void SetAllowExpandedDims(bool ExpandedDimsAllowed); 318 319 private: 320 321 std::unique_ptr<armnn::OptimizerOptionsOpaqueImpl> p_OptimizerOptionsImpl; 322 323 }; 324 325 class IWorkloadFactory; 326 class NetworkImpl; 327 using INetworkPtr = std::unique_ptr<INetwork, void(*)(INetwork* network)>; 328 using IOptimizedNetworkPtr = std::unique_ptr<IOptimizedNetwork, void(*)(IOptimizedNetwork* network)>; 329 330 using CompiledBlobDeleter = std::function<void(const void*)>; 331 using CompiledBlobPtr = std::unique_ptr<void, CompiledBlobDeleter>; 332 333 /// Main network class which provides the interface for building up a neural network. 334 /// This object is subsequently required by the IRuntime::Load() method. 335 class INetwork 336 { 337 public: 338 static INetwork* CreateRaw(const NetworkOptions& networkOptions = {}); 339 static INetworkPtr Create(const NetworkOptions& networkOptions = {}); 340 static void Destroy(INetwork* network); 341 342 Status PrintGraph(); 343 344 /// Adds an input layer to the network. 345 /// @param id - User generated id to uniquely identify a particular input. The same id needs to be specified. 346 /// when passing the inputs to the IRuntime::EnqueueWorkload() function. 347 /// @param name - Optional name for the layer. 348 /// @return - Interface for configuring the layer. 349 IConnectableLayer* AddInputLayer(LayerBindingId id, const char* name = nullptr); 350 351 /// Adds an ArgMinMax layer to the network. 352 /// @param desc - Parameters for the L2 normalization operation. 353 /// @param name - Optional name for the layer. 354 /// @return - Interface for configuring the layer. 355 IConnectableLayer* AddArgMinMaxLayer(const ArgMinMaxDescriptor& desc, 356 const char* name = nullptr); 357 358 /// Adds a cast layer to the network. 359 /// @param name - Optional name for the layer. 360 /// @return - Interface for configuring the layer. 361 IConnectableLayer* AddCastLayer(const char* name = nullptr); 362 363 /// Add a Comparison layer to the network. 364 /// @param name - Optional name for the layer. 365 /// @param desc - Descriptor for the comparison operation. 366 /// @return - Interface for configuring the layer. 367 IConnectableLayer* AddComparisonLayer(const ComparisonDescriptor& comparisonDescriptor, 368 const char* name = nullptr); 369 370 /// Adds a concatenation layer to the network. 371 /// @param concatDescriptor - ConcatDescriptor (synonym for OriginsDescriptor) to configure the concatenation 372 /// process. Number of Views must be equal to the number of inputs, and their order 373 /// must match - e.g. first view corresponds to the first input, second view to the 374 /// second input, etc.... 375 /// @param name - Optional name for the layer. 376 /// @return - Interface for configuring the layer. 377 IConnectableLayer* AddConcatLayer(const ConcatDescriptor& concatDescriptor, 378 const char* name = nullptr); 379 380 /// Adds a 2D convolution layer to the network. 381 /// @param convolution2dDescriptor - Description of the 2D convolution layer. 382 /// @param name - Optional name for the layer. 383 /// @return - Interface for configuring the layer. 384 IConnectableLayer* AddConvolution2dLayer(const Convolution2dDescriptor& convolution2dDescriptor, 385 const char* name = nullptr); 386 387 /// Adds a 3D convolution layer to the network. 388 /// @param convolution3dDescriptor - Description of the 3D convolution layer. 389 /// @param name - Optional name for the layer. 390 /// @return - Interface for configuring the layer. 391 IConnectableLayer* AddConvolution3dLayer(const Convolution3dDescriptor& convolution3dDescriptor, 392 const char* name = nullptr); 393 394 /// Adds a depth to space layer to the network. 395 /// @param depthToSpaceDescriptor - Parameters for the depth to space operation. 396 /// @param name - Optional name for the layer. 397 /// @return - Interface for configuring the layer. 398 IConnectableLayer* AddDepthToSpaceLayer(const DepthToSpaceDescriptor& depthToSpaceDescriptor, 399 const char* name = nullptr); 400 401 /// Adds a 2D depthwise convolution layer to the network. 402 /// @param convolution2dDescriptor - Description of the 2D depthwise convolution layer. 403 /// @param name - Optional name for the layer. 404 /// @return - Interface for configuring the layer. 405 IConnectableLayer* AddDepthwiseConvolution2dLayer(const DepthwiseConvolution2dDescriptor& convolution2dDescriptor, 406 const char* name = nullptr); 407 408 /// Adds a Dequantize layer to the network. 409 /// @return - Interface for configuring the layer. 410 IConnectableLayer* AddDequantizeLayer(const char* name = nullptr); 411 412 /// Adds a Detection PostProcess layer to the network. 413 /// @param descriptor - Description of the Detection PostProcess layer. 414 /// @param anchors - Tensor for anchors. 415 /// @param name - Optional name for the layer. 416 /// @return - Interface for configuring the layer. 417 IConnectableLayer* AddDetectionPostProcessLayer( 418 const DetectionPostProcessDescriptor& descriptor, 419 const ConstTensor& anchors, 420 const char* name = nullptr); 421 422 /// Add an ElementwiseBinary layer to the network. 423 /// @param name - Optional name for the layer. 424 /// @param desc - Descriptor for the elementwiseBinary operations. 425 /// @return - Interface for configuring the layer. 426 IConnectableLayer* AddElementwiseBinaryLayer(const ElementwiseBinaryDescriptor& elementwiseUnaryDescriptor, 427 const char* name = nullptr); 428 429 /// Add an ElementwiseUnary layer to the network. 430 /// @param name - Optional name for the layer. 431 /// @param desc - Descriptor for the elementwiseUnary operations. 432 /// @return - Interface for configuring the layer. 433 IConnectableLayer* AddElementwiseUnaryLayer(const ElementwiseUnaryDescriptor& elementwiseUnaryDescriptor, 434 const char* name = nullptr); 435 436 /// Add an Fill layer to the network. 437 /// @param name - Optional name for the layer. 438 /// @param fillDescriptor - Descriptor for the fill operation. 439 /// @return - Interface for configuring the layer. 440 IConnectableLayer* AddFillLayer(const FillDescriptor& fillDescriptor, 441 const char* name = nullptr); 442 443 444 /// Adds a fully connected layer to the network. 445 /// @param fullyConnectedDescriptor - Description of the fully connected layer. 446 /// @return - Interface for configuring the layer. 447 /// 448 /// @note Weights and biases are passed in as inputs. If they are constant tensors you can simply store 449 /// them in a ConstantLayer as seen below. A full example can be found in samples/SimpleSample.cpp. 450 /// 451 /// @code 452 /// // Make sure the IsConstant flag is set on the weightsInfo before passing it to the ConstTensor. 453 /// ConstTensor weights(weightsInfo, weightsData); 454 /// 455 /// // Constant layer that now holds weights data for FullyConnected 456 /// IConnectableLayer* const constantWeightsLayer = myNetwork->AddConstantLayer(weights, "weights"); 457 /// 458 /// FullyConnectedDescriptor fullyConnectedDesc; 459 /// IConnectableLayer* const fullyConnectedLayer = myNetwork->AddFullyConnectedLayer(fullyConnectedDesc, 460 /// "fully connected"); 461 /// IConnectableLayer* InputLayer = myNetwork->AddInputLayer(0); 462 /// InputLayer->GetOutputSlot(0).Connect(fullyConnectedLayer->GetInputSlot(0)); 463 /// constantWeightsLayer->GetOutputSlot(0).Connect(fullyConnectedLayer->GetInputSlot(1)); 464 /// @endcode 465 IConnectableLayer* AddFullyConnectedLayer(const FullyConnectedDescriptor& fullyConnectedDescriptor, 466 const char* name = nullptr); 467 468 /// Adds a permute layer to the network. 469 /// @param permuteDescriptor - PermuteDescriptor to configure the permute. 470 /// @param name - Optional name for the layer. 471 /// @return - Interface for configuring the layer. 472 IConnectableLayer* AddPermuteLayer(const PermuteDescriptor& permuteDescriptor, 473 const char* name = nullptr); 474 475 /// Adds a batch to space ND layer to the network. 476 /// @param batchToSpaceNdDescriptor - Description of the layer. 477 /// @param name - Optional name for the layer. 478 /// @return - Interface for configuring the layer. 479 IConnectableLayer* AddBatchToSpaceNdLayer(const BatchToSpaceNdDescriptor& batchToSpaceNdDescriptor, 480 const char* name = nullptr); 481 482 /// Adds a 2D pooling layer to the network. 483 /// @param pooling2dDescriptor - Pooling2dDescriptor to configure the pooling. 484 /// @param name - Optional name for the layer. 485 /// @return - Interface for configuring the layer. 486 IConnectableLayer* AddPooling2dLayer(const Pooling2dDescriptor& pooling2dDescriptor, 487 const char* name = nullptr); 488 489 /// Adds a 3D pooling layer to the network. 490 /// @param pooling3dDescriptor - Pooling3dDescriptor to configure the pooling. 491 /// @param name - Optional name for the layer. 492 /// @return - Interface for configuring the layer. 493 IConnectableLayer* AddPooling3dLayer(const Pooling3dDescriptor& pooling3dDescriptor, 494 const char* name = nullptr); 495 496 /// Adds a Precompiled layer to the network. 497 /// Method use is for backend users. 498 /// @param preCompiledDescriptor - PreCompiledDescriptor contains parameters for the Precompiled layer. 499 /// @param compiledBlobPtr - CompiledBlobPtr pre-compiled object set for the Precompiled layer. 500 /// @param backend - optional BackendId set for the Precompiled layer. 501 /// @return - Interface for configuring the layer. 502 IConnectableLayer* AddPrecompiledLayer(const PreCompiledDescriptor& preCompiledDescriptor, 503 CompiledBlobPtr compiledBlobPtr, 504 const Optional<BackendId>& backend, 505 const char* name = nullptr); 506 507 /// Adds an activation layer to the network. 508 /// @param activationDescriptor - ActivationDescriptor to configure the activation. 509 /// @param name - Optional name for the layer. 510 /// @return - Interface for configuring the layer. 511 IConnectableLayer* AddActivationLayer(const ActivationDescriptor& activationDescriptor, 512 const char* name = nullptr); 513 514 /// Adds a normalization layer to the network. 515 /// @param normalizationDescriptor - NormalizationDescriptor to configure the normalization. 516 /// @param name - Optional name for the layer. 517 /// @return - Interface for configuring the layer. 518 IConnectableLayer* AddNormalizationLayer(const NormalizationDescriptor& normalizationDescriptor, 519 const char* name = nullptr); 520 521 /// Adds a slice layer to the network. 522 /// @param sliceDescriptor - SliceDescriptor to configure the slice operation. 523 /// @param name - Optional name for the layer. 524 /// @return - Interface for configuring the layer. 525 IConnectableLayer* AddSliceLayer(const SliceDescriptor& sliceDescriptor, const char* name = nullptr); 526 527 /// Adds a softmax layer to the network. 528 /// If the data type is QAsymm8, then the output quantization parameters 529 /// must have a scale of 1/256 and an offset of 0 530 /// @param softmaxDescriptor - SoftmaxDescriptor to configure the softmax. 531 /// @param name - Optional name for the layer. 532 /// @return - Interface for configuring the layer. 533 IConnectableLayer* AddSoftmaxLayer(const SoftmaxDescriptor& softmaxDescriptor, 534 const char* name = nullptr); 535 536 /// Adds a splitter layer to the network. 537 /// @param splitterDescriptor - ViewsDescriptor to configure the splitting process. 538 /// Number of Views must be equal to the number of outputs, 539 /// and their order must match - e.g. first view corresponds to 540 /// the first output, second view to the second output, etc.... 541 /// @param name - Optional name for the layer. 542 /// @return - Interface for configuring the layer. 543 IConnectableLayer* AddSplitterLayer(const ViewsDescriptor& splitterDescriptor, 544 const char* name = nullptr); 545 546 /// Adds a merge layer to the network. 547 /// @param name - Optional name for the layer. 548 /// @return - Interface for configuring the layer. 549 IConnectableLayer* AddMergeLayer(const char* name = nullptr); 550 551 /// Adds an addition layer to the network. 552 /// @param name - Optional name for the layer. 553 /// @return - Interface for configuring the layer. 554 ARMNN_DEPRECATED_MSG_REMOVAL_DATE("Use AddElementwiseBinaryLayer instead", "24.02") 555 IConnectableLayer* AddAdditionLayer(const char* name = nullptr); 556 557 /// Adds a multiplication layer to the network. 558 /// @param name - Optional name for the layer. 559 /// @return - Interface for configuring the layer. 560 ARMNN_DEPRECATED_MSG_REMOVAL_DATE("Use AddElementwiseBinaryLayer instead", "24.02") 561 IConnectableLayer* AddMultiplicationLayer(const char* name = nullptr); 562 563 /// Adds a batch normalization layer to the network. 564 /// @param mean - Pre-calculated mean for each channel. 565 /// @param variance - Pre-calculated variance for each channel. 566 /// @param beta - Per-channel additive factor. 567 /// @param gamma - Per-channel multiplicative factor. 568 /// @return - Interface for configuring the layer. 569 /// @param name - Optional name for the layer. 570 IConnectableLayer* AddBatchNormalizationLayer(const BatchNormalizationDescriptor& desc, 571 const ConstTensor& mean, 572 const ConstTensor& variance, 573 const ConstTensor& beta, 574 const ConstTensor& gamma, 575 const char* name = nullptr); 576 577 /// Adds a rank layer to the network. 578 /// @param name - Optional name for the layer. 579 /// @return - Interface for configuring the layer. 580 IConnectableLayer* AddRankLayer(const char* name = nullptr); 581 582 /// Adds a resize layer to the network. 583 /// @param resizeDescriptor - Parameters for the resize operation. 584 /// @param name - Optional name for the layer. 585 /// @return - Interface for configuring the layer. 586 IConnectableLayer* AddResizeLayer(const ResizeDescriptor& resizeDescriptor, 587 const char* name = nullptr); 588 589 /// Adds a reduce layer to the network. 590 /// @param ReduceDescriptor - Parameters for the reduce operation. 591 /// @param name - Optional name for the layer. 592 /// @return - Interface for configuring the layer. 593 IConnectableLayer* AddReduceLayer(const ReduceDescriptor& reduceDescriptor, 594 const char* name = nullptr); 595 596 /// Adds an instance normalization layer to the network. 597 /// @param desc - Parameters for the instance normalization operation. 598 /// @param name - Optional name for the layer. 599 /// @return - Interface for configuring the layer. 600 IConnectableLayer* AddInstanceNormalizationLayer(const InstanceNormalizationDescriptor& desc, 601 const char* name = nullptr); 602 603 /// Adds an L2 normalization layer to the network. 604 /// Normalization is performed along dimension 1, but requires a 4d input. 605 /// @param desc - Parameters for the L2 normalization operation. 606 /// @param name - Optional name for the layer. 607 /// @return - Interface for configuring the layer. 608 IConnectableLayer* AddL2NormalizationLayer(const L2NormalizationDescriptor& desc, 609 const char* name = nullptr); 610 611 /// Adds a log softmax layer to the network. 612 /// @param logSoftmaxDescriptor - LogSoftmaxDescriptor to configure the log softmax. 613 /// @param name - Optional name for the layer. 614 /// @return - Interface for configuring the layer. 615 IConnectableLayer* AddLogSoftmaxLayer(const LogSoftmaxDescriptor& logSoftmaxDescriptor, 616 const char* name = nullptr); 617 618 /// Adds a layer with no inputs and a single output, which always corresponds to 619 /// the passed in constant tensor. 620 /// @param input - Tensor to be provided as the only output of the layer. The layer will maintain 621 /// its own copy of the tensor data, meaning the memory referenced by @a input can 622 /// be freed or reused after this function is called. 623 /// @param name - Optional name for the layer. 624 /// @return - Interface for configuring the layer. 625 IConnectableLayer* AddConstantLayer(const ConstTensor& input, 626 const char* name = nullptr); 627 628 /// Adds a reshape layer to the network. 629 /// @param reshapeDescriptor - Parameters for the reshape operation. 630 /// @param name - Optional name for the layer. 631 /// @return - Interface for configuring the layer. 632 IConnectableLayer* AddReshapeLayer(const ReshapeDescriptor& reshapeDescriptor, 633 const char* name = nullptr); 634 635 /// Adds a shape layer to the network. 636 /// @param name - Optional name for the layer. 637 /// @return - Interface for configuring the layer. 638 IConnectableLayer* AddShapeLayer(const char* name = nullptr); 639 640 /// Adds a space to batch layer to the network. 641 /// @param spaceToBatchNdDescriptor - Parameters for the space to batch operation. 642 /// @param name - Optional name for the layer. 643 /// @return - Interface for configuring the layer. 644 IConnectableLayer* AddSpaceToBatchNdLayer(const SpaceToBatchNdDescriptor& spaceToBatchNdDescriptor, 645 const char* name = nullptr); 646 647 /// Adds a space to depth layer to the network. 648 /// @param spaceToDepthDescriptor - Parameters for the space to depth operation. 649 /// @param name - Optional name for the layer. 650 /// @return - Interface for configuring the layer. 651 IConnectableLayer* AddSpaceToDepthLayer(const SpaceToDepthDescriptor& spaceToDepthDescriptor, 652 const char* name = nullptr); 653 654 /// Adds a floor layer to the network. 655 /// @param name - Optional name for the layer. 656 /// @return - Interface for configuring the layer. 657 IConnectableLayer* AddFloorLayer(const char* name = nullptr); 658 659 /// Adds an output layer to the network. 660 /// @param id - User generated id to uniquely identify a particular output. The same id needs to be specified 661 /// when passing the outputs to the IRuntime::EnqueueWorkload() function. 662 /// @param name - Optional name for the layer. 663 /// @return - Interface for configuring the layer. 664 IConnectableLayer* AddOutputLayer(LayerBindingId id, const char* name = nullptr); 665 666 /// Add a Lstm layer to the network 667 /// @param descriptor - Parameters for the Lstm operation 668 /// @param params - Weights and biases for the LSTM cell 669 /// @param name - Optional name for the layer 670 /// @return - Interface for configuring the layer. 671 IConnectableLayer* AddLstmLayer(const LstmDescriptor& descriptor, 672 const LstmInputParams& params, 673 const char* name = nullptr); 674 675 /// Adds a division layer to the network. 676 /// @param name - Optional name for the layer. 677 /// @return - Interface for configuring the layer. 678 ARMNN_DEPRECATED_MSG_REMOVAL_DATE("Use AddElementwiseBinaryLayer instead", "24.02") 679 IConnectableLayer* AddDivisionLayer(const char* name = nullptr); 680 681 /// Adds a subtraction layer to the network. 682 /// @param name - Optional name for the layer. 683 /// @return - Interface for configuring the layer. 684 ARMNN_DEPRECATED_MSG_REMOVAL_DATE("Use AddElementwiseBinaryLayer instead", "24.02") 685 IConnectableLayer* AddSubtractionLayer(const char* name = nullptr); 686 687 /// Add a Maximum layer to the network. 688 /// @param name - Optional name for the layer. 689 /// @return - Interface for configuring the layer. 690 ARMNN_DEPRECATED_MSG_REMOVAL_DATE("Use AddElementwiseBinaryLayer instead", "24.02") 691 IConnectableLayer* AddMaximumLayer(const char* name = nullptr); 692 693 /// Add a Mean layer to the network. 694 /// @param meanDescriptor - Parameters for the mean operation. 695 /// @param name - Optional name for the layer. 696 /// @return - Interface for configuring the layer. 697 IConnectableLayer* AddMeanLayer(const MeanDescriptor& meanDescriptor, const char* name = nullptr); 698 699 /// Adds a fully pad layer to the network. 700 /// @param paddings - n by 2 tensor, where n is the rank of the input tensor, 701 /// such that paddings[i,0] indicates the amount of padding to add in front of dimonsion i, and 702 /// paddings[i,1] indicates the amount of padding to add after the end of dimension i 703 /// @param name - Optional name for the layer. 704 /// @return - Interface for configuring the layer. 705 IConnectableLayer* AddPadLayer(const PadDescriptor& padDescriptor, 706 const char* name = nullptr); 707 708 /// Add a quantize layer to the network 709 ///@param name - Optional name for the layer. 710 /// @return - Interface for configuring the layer. 711 IConnectableLayer* AddQuantizeLayer(const char* name = nullptr); 712 713 /// Adds a strided slice layer to the network. 714 /// @param StridedSliceDescriptor - Parameters for the strided slice operation. 715 /// @param name - Optional name for the layer. 716 /// @return - Interface for configuring the layer. 717 IConnectableLayer* AddStridedSliceLayer(const StridedSliceDescriptor& stridedSliceDescriptor, 718 const char* name = nullptr); 719 720 /// Add a Minimum layer to the network. 721 /// @param name - Optional name for the layer. 722 /// @return - Interface for configuring the layer. 723 ARMNN_DEPRECATED_MSG_REMOVAL_DATE("Use AddElementwiseBinaryLayer instead", "24.02") 724 IConnectableLayer* AddMinimumLayer(const char* name = nullptr); 725 726 /// Add Gather layer to the network. 727 /// @param descriptor - Description of the gather layer. 728 /// @param name - Optional name for the layer. 729 /// @return - Interface for configuring the layer. 730 IConnectableLayer* AddGatherLayer(const GatherDescriptor& descriptor, 731 const char* name = nullptr); 732 733 /// Add GatherNd layer to the network. 734 /// @param name - Optional name for the layer. 735 /// @return - Interface for configuring the layer. 736 IConnectableLayer* AddGatherNdLayer(const char* name = nullptr); 737 738 /// Adds a switch layer to the network. 739 /// @param name - Optional name for the layer. 740 /// @return - Interface for configuring the layer. 741 IConnectableLayer* AddSwitchLayer(const char* name = nullptr); 742 743 /// Adds a PReLU layer to the network. 744 /// @param name - Optional name for the layer. 745 /// @return - Interface for configuring the layer. 746 IConnectableLayer* AddPreluLayer(const char* name = nullptr); 747 748 /// Adds a 2D transpose convolution layer to the network. 749 /// @param descriptor - Description of the 2D transpose convolution layer. 750 /// @param weights - Tensor for the weights data. 751 /// @param biases - Optional tensor for the bias data. 752 /// @param name - Optional name for the layer. 753 /// @return - Interface for configuring the layer. 754 IConnectableLayer* AddTransposeConvolution2dLayer(const TransposeConvolution2dDescriptor& descriptor, 755 const ConstTensor& weights, 756 const Optional<ConstTensor>& biases, 757 const char* name = nullptr); 758 759 /// Adds a transpose layer to the network. 760 /// @param transposeDescriptor - TransposeDescriptor to configure the transpose. 761 /// @param name - Optional name for the layer. 762 /// @return - Interface for configuring the layer. 763 IConnectableLayer* AddTransposeLayer(const TransposeDescriptor& transposeDescriptor, 764 const char* name = nullptr); 765 766 /// Adds a stack layer to the network. 767 /// @param descriptor - Description of the stack layer. 768 /// @param name - Optional name for the layer. 769 /// @return - Interface for configuring the layer. 770 IConnectableLayer* AddStackLayer(const StackDescriptor& descriptor, 771 const char* name = nullptr); 772 773 /// Add a stand-in layer for a type unknown to the Arm NN framework. 774 /// Note: Due to the nature of this layer, no validation can be performed by the framework. 775 /// Furthermore, Any model containing this layer cannot make use of dynamic tensors since the 776 /// tensor sizes cannot be inferred. 777 /// @descriptor - Descriptor for the StandIn layer. 778 /// @return - Interface for configuring the layer. 779 IConnectableLayer* AddStandInLayer(const StandInDescriptor& descriptor, 780 const char* name = nullptr); 781 782 /// Add a QuantizedLstm layer to the network 783 /// @param params - The weights and biases for the Quantized LSTM cell 784 /// @param name - Optional name for the layer 785 /// @return - Interface for configuring the layer. 786 IConnectableLayer* AddQuantizedLstmLayer(const QuantizedLstmInputParams& params, 787 const char* name = nullptr); 788 789 /// Add a QLstm layer to the network 790 /// @param descriptor - Parameters for the QLstm operation 791 /// @param params - Weights and biases for the layer 792 /// @param name - Optional name for the layer 793 /// @return - Interface for configuring the layer. 794 IConnectableLayer* AddQLstmLayer(const QLstmDescriptor& descriptor, 795 const LstmInputParams& params, 796 const char* name = nullptr); 797 798 /// Adds a Logical Binary layer to the network. 799 /// @param descriptor - Description of the Logical Binary layer. 800 /// @param name - Optional name for the layer. 801 /// @return - Interface for configuring the layer. 802 IConnectableLayer* AddLogicalBinaryLayer(const LogicalBinaryDescriptor& descriptor, 803 const char* name = nullptr); 804 805 /// Add a UnidirectionalSequenceLstm layer to the network 806 /// @param descriptor - Parameters for the UnidirectionalSequenceLstm operation 807 /// @param params - Weights and biases for the UnidirectionalSequenceLstm 808 /// @param name - Optional name for the layer 809 /// @return - Interface for configuring the layer. 810 IConnectableLayer* AddUnidirectionalSequenceLstmLayer(const UnidirectionalSequenceLstmDescriptor& descriptor, 811 const LstmInputParams& params, 812 const char* name = nullptr); 813 814 /// Add a ChannelShuffle layer to the network 815 /// @param descriptor - Parameters for the ChannelShuffle operation 816 /// @param name - Optional name for the layer 817 /// @return - Interface for configuring the layer 818 IConnectableLayer* AddChannelShuffleLayer(const ChannelShuffleDescriptor& descriptor, 819 const char* name = nullptr); 820 821 /// Add a BatchMatMul layer to the network 822 /// @param descriptor - Parameters for the BatchMatMul operation 823 /// @param name - Optional name for the layer 824 /// @return - Interface for configuring the layer 825 IConnectableLayer* AddBatchMatMulLayer(const BatchMatMulDescriptor& descriptor, 826 const char* name = nullptr); 827 828 void ExecuteStrategy(IStrategy& strategy) const; 829 830 protected: 831 ~INetwork(); 832 833 friend void VisitLayersTopologically(const INetwork* inputNetwork, IStrategy& strategy); 834 friend class TestConnectionPreservation; 835 friend TensorInfo GetInputTensorInfo(const INetwork* network); 836 friend IOptimizedNetworkPtr Optimize(const INetwork& network, 837 const std::vector<BackendId>& backendPreferences, 838 const IDeviceSpec& deviceSpec, 839 const OptimizerOptions& options, 840 Optional<std::vector<std::string>&> messages); 841 friend IOptimizedNetworkPtr Optimize(const INetwork& network, 842 const std::vector<BackendId>& backendPreferences, 843 const IDeviceSpec& deviceSpec, 844 const OptimizerOptionsOpaque& options, 845 Optional<std::vector<std::string>&> messages); 846 847 INetwork(NetworkOptions networkOptions = {}); 848 849 std::unique_ptr<NetworkImpl> pNetworkImpl; 850 }; 851 852 namespace experimental 853 { 854 class AsyncNetworkImpl; 855 class WorkingMemHandle; 856 } 857 858 struct BackendSettings; 859 struct OptimizationResult; 860 class OptimizedNetworkImpl; 861 class IProfiler; 862 class IOptimizedNetwork 863 { 864 public: 865 static void Destroy(IOptimizedNetwork* network); 866 867 Status PrintGraph(); 868 Status SerializeToDot(std::ostream& stream) const; 869 870 arm::pipe::ProfilingGuid GetGuid() const; 871 872 size_t GetNumInputs() const; 873 size_t GetNumOutputs() const; 874 875 void ExecuteStrategy(IStrategy& strategy) const; 876 877 /// Creates a copy of the IOptimizedNetwork. The IOptimizedNetwork will not be reoptimized, 878 /// the provided ModelOptions will only be used when creating a LoadedNetwork. 879 IOptimizedNetwork(const IOptimizedNetwork& other, const ModelOptions& modelOptions); 880 IOptimizedNetwork(std::unique_ptr<Graph> graph); 881 IOptimizedNetwork(std::unique_ptr<OptimizedNetworkImpl> impl); 882 ~IOptimizedNetwork(); 883 884 const std::shared_ptr<IProfiler>& GetProfiler() const; 885 886 protected: 887 friend class LoadedNetwork; 888 889 friend class experimental::AsyncNetworkImpl; 890 friend class experimental::WorkingMemHandle; 891 892 friend Graph& GetGraphForTesting(IOptimizedNetwork* optNetPtr); 893 friend ModelOptions& GetModelOptionsForTesting(IOptimizedNetwork* optNetPtr); 894 friend IOptimizedNetworkPtr Optimize(const INetwork& inNetwork, 895 const std::vector<BackendId>& backendPreferences, 896 const IDeviceSpec& deviceSpec, 897 const OptimizerOptionsOpaque& options, 898 Optional<std::vector<std::string>&> messages); 899 friend IOptimizedNetworkPtr Optimize(const Graph& inGraph, 900 const std::vector<BackendId>& backendPreferences, 901 const IDeviceSpec& deviceSpec, 902 const OptimizerOptionsOpaque& options, 903 Optional<std::vector<std::string>&> messages); 904 905 IOptimizedNetwork(std::unique_ptr<Graph> graph, const ModelOptions& modelOptions); 906 907 std::unique_ptr<OptimizedNetworkImpl> pOptimizedNetworkImpl; 908 }; 909 910 /// Create an optimized version of the network 911 /// @param network INetwork description of the network to be optimized. 912 /// @param backendPreferences The choice of the backend ordered by user preferences. 913 /// @param deviceSpec DeviceSpec object as queried from the runtime. See IRuntime::GetDeviceSpec() 914 /// @param messages If there are failures or warnings a string describing same will be added to the vector 915 /// @param options OptimizerOptions object with optimizer configuration options 916 /// @return An IOptimizedNetworkPtr interface to the optimized network, throws an exception derived from 917 /// armnn::Exception if process fails. 918 919 IOptimizedNetworkPtr Optimize(const INetwork& network, 920 const std::vector<BackendId>& backendPreferences, 921 const IDeviceSpec& deviceSpec, 922 const OptimizerOptionsOpaque& options = OptimizerOptionsOpaque(), 923 Optional<std::vector<std::string>&> messages = EmptyOptional()); 924 925 /// Create an optimized version of the network 926 /// @param inGraph Graph to be optimized. 927 /// @param backendPreferences The choice of the backend ordered by user preferences. 928 /// @param deviceSpec DeviceSpec object as queried from the runtime. See IRuntime::GetDeviceSpec() 929 /// @param messages If there are failures or warnings a string describing same will be added to the vector 930 /// @param options OptimizerOptions object with optimizer configuration options 931 /// @return An IOptimizedNetworkPtr interface to the optimized network, throws an exception derived from 932 /// armnn::Exception if process fails. 933 934 IOptimizedNetworkPtr Optimize(const Graph& inGraph, 935 const std::vector<BackendId>& backendPreferences, 936 const IDeviceSpec& deviceSpec, 937 const OptimizerOptionsOpaque& options, 938 Optional<std::vector<std::string>&> messages = EmptyOptional()); 939 940 /// Accept legacy OptimizerOptions 941 IOptimizedNetworkPtr Optimize(const Graph& inGraph, 942 const std::vector<BackendId>& backendPreferences, 943 const IDeviceSpec& deviceSpec, 944 const OptimizerOptions& options, 945 Optional<std::vector<std::string>&> messages = EmptyOptional()); 946 947 /// Accept legacy OptimizerOptions 948 IOptimizedNetworkPtr Optimize(const INetwork& network, 949 const std::vector<BackendId>& backendPreferences, 950 const IDeviceSpec& deviceSpec, 951 const OptimizerOptions& options, 952 Optional<std::vector<std::string>&> messages = EmptyOptional()); 953 954 } //namespace armnn 955