1 // 2 // Copyright © 2017-2023 Arm Ltd and Contributors. All rights reserved. 3 // SPDX-License-Identifier: MIT 4 // 5 #pragma once 6 7 #include <armnn/DescriptorsFwd.hpp> 8 #include <armnn/INetwork.hpp> 9 #include <armnn/LstmParams.hpp> 10 #include <armnn/QuantizedLstmParams.hpp> 11 #include <armnn/TensorFwd.hpp> 12 #include <armnn/Types.hpp> 13 14 #include <Graph.hpp> 15 #include <Layer.hpp> 16 #include <OptimizedNetworkImpl.hpp> 17 #include <armnn/backends/SubgraphView.hpp> 18 19 #include <string> 20 #include <vector> 21 #include <map> 22 #include <memory> 23 24 namespace armnn 25 { 26 27 class Graph; 28 29 using NetworkImplPtr = std::unique_ptr<NetworkImpl, void (*)(NetworkImpl* network)>; 30 31 /// Private implementation of INetwork. 32 class NetworkImpl 33 { 34 public: 35 NetworkImpl(const NetworkOptions& networkOptions = {}); 36 ~NetworkImpl(); 37 GetGraph() const38 const Graph& GetGraph() const 39 { return *m_Graph; } 40 41 Status PrintGraph(); 42 43 IConnectableLayer* AddInputLayer(LayerBindingId id, const char* name = nullptr); 44 45 IConnectableLayer* AddActivationLayer(const ActivationDescriptor& activationDescriptor, 46 const char* name = nullptr); 47 ARMNN_DEPRECATED_MSG_REMOVAL_DATE("Use AddElementwiseBinaryLayer instead", "24.02") 48 IConnectableLayer* AddAdditionLayer(const char* name = nullptr); 49 50 IConnectableLayer* AddArgMinMaxLayer(const ArgMinMaxDescriptor& desc, 51 const char* name = nullptr); 52 53 IConnectableLayer* AddBatchMatMulLayer(const BatchMatMulDescriptor& desc, 54 const char* name = nullptr); 55 56 IConnectableLayer* AddBatchNormalizationLayer(const BatchNormalizationDescriptor& desc, 57 const ConstTensor& mean, 58 const ConstTensor& variance, 59 const ConstTensor& beta, 60 const ConstTensor& gamma, 61 const char* name = nullptr); 62 63 IConnectableLayer* AddBatchToSpaceNdLayer(const BatchToSpaceNdDescriptor& batchToSpaceNdDescriptor, 64 const char* name = nullptr); 65 66 IConnectableLayer* AddCastLayer(const char* name = nullptr); 67 68 IConnectableLayer* AddChannelShuffleLayer(const ChannelShuffleDescriptor& channelShuffleDescriptor, 69 const char* name = nullptr); 70 71 IConnectableLayer* AddComparisonLayer(const ComparisonDescriptor& comparisonDescriptor, 72 const char* name = nullptr); 73 74 IConnectableLayer* AddConcatLayer(const ConcatDescriptor& concatDescriptor, 75 const char* name = nullptr); 76 77 IConnectableLayer* AddConvolution2dLayer(const Convolution2dDescriptor& convolution2dDescriptor, 78 const char* name = nullptr); 79 80 IConnectableLayer* AddConvolution3dLayer(const Convolution3dDescriptor& convolution3dDescriptor, 81 const char* name = nullptr); 82 83 IConnectableLayer* AddConstantLayer(const ConstTensor& input, const char* name = nullptr); 84 85 IConnectableLayer* AddDepthToSpaceLayer(const DepthToSpaceDescriptor& depthToSpaceDescriptor, 86 const char* name = nullptr); 87 88 IConnectableLayer* AddDepthwiseConvolution2dLayer(const DepthwiseConvolution2dDescriptor& convolution2dDescriptor, 89 const char* name = nullptr); 90 91 IConnectableLayer* AddDequantizeLayer(const char* name = nullptr); 92 93 IConnectableLayer* AddDetectionPostProcessLayer(const DetectionPostProcessDescriptor& descriptor, 94 const ConstTensor& anchors, 95 const char* name = nullptr); 96 97 ARMNN_DEPRECATED_MSG_REMOVAL_DATE("Use AddElementwiseBinaryLayer instead", "24.02") 98 IConnectableLayer* AddDivisionLayer(const char* name = nullptr); 99 100 IConnectableLayer* AddElementwiseBinaryLayer(const ElementwiseBinaryDescriptor& elementwiseBinaryDescriptor, 101 const char* name = nullptr); 102 103 IConnectableLayer* AddElementwiseUnaryLayer(const ElementwiseUnaryDescriptor& elementwiseUnaryDescriptor, 104 const char* name = nullptr); 105 106 IConnectableLayer* AddMergeLayer(const char* name = nullptr); 107 108 IConnectableLayer* AddFillLayer(const FillDescriptor& fillDescriptor, 109 const char* name = nullptr); 110 111 IConnectableLayer* AddFloorLayer(const char* name = nullptr); 112 113 IConnectableLayer* AddFullyConnectedLayer(const FullyConnectedDescriptor& fullyConnectedDescriptor, 114 const char* name = nullptr); 115 116 IConnectableLayer* AddGatherLayer(const GatherDescriptor& gatherDescriptor, 117 const char* name = nullptr); 118 119 IConnectableLayer* AddGatherNdLayer(const char* name = nullptr); 120 121 IConnectableLayer* AddInstanceNormalizationLayer(const InstanceNormalizationDescriptor& desc, 122 const char* name = nullptr); 123 124 IConnectableLayer* AddL2NormalizationLayer(const L2NormalizationDescriptor& desc, 125 const char* name = nullptr); 126 127 IConnectableLayer* AddLogSoftmaxLayer(const LogSoftmaxDescriptor& logSoftmaxDescriptor, 128 const char* name = nullptr); 129 130 IConnectableLayer* AddLogicalBinaryLayer(const LogicalBinaryDescriptor& logicalBinaryDescriptor, 131 const char* name = nullptr); 132 133 IConnectableLayer* AddLstmLayer(const LstmDescriptor& descriptor, 134 const LstmInputParams& params, 135 const char* name = nullptr); 136 137 ARMNN_DEPRECATED_MSG_REMOVAL_DATE("Use AddElementwiseBinaryLayer instead", "24.02") 138 IConnectableLayer* AddMaximumLayer(const char* name = nullptr); 139 140 IConnectableLayer* AddMeanLayer(const MeanDescriptor& meanDescriptor, const char* name = nullptr); 141 142 ARMNN_DEPRECATED_MSG_REMOVAL_DATE("Use AddElementwiseBinaryLayer instead", "24.02") 143 IConnectableLayer* AddMinimumLayer(const char* name = nullptr); 144 145 ARMNN_DEPRECATED_MSG_REMOVAL_DATE("Use AddElementwiseBinaryLayer instead", "24.02") 146 IConnectableLayer* AddMultiplicationLayer(const char* name = nullptr); 147 148 IConnectableLayer* AddNormalizationLayer(const NormalizationDescriptor& normalizationDescriptor, 149 const char* name = nullptr); 150 151 IConnectableLayer* AddOutputLayer(LayerBindingId id, const char* name = nullptr); 152 153 IConnectableLayer* AddPadLayer(const PadDescriptor& padDescriptor, const char* name = nullptr); 154 155 IConnectableLayer* AddPermuteLayer(const PermuteDescriptor& permuteDescriptor, 156 const char* name = nullptr); 157 158 IConnectableLayer* AddPooling2dLayer(const Pooling2dDescriptor& pooling2dDescriptor, 159 const char* name = nullptr); 160 161 IConnectableLayer* AddPooling3dLayer(const Pooling3dDescriptor& pooling3dDescriptor, 162 const char* name = nullptr); 163 164 IConnectableLayer* AddPrecompiledLayer(const PreCompiledDescriptor& preCompiledDescriptor, 165 CompiledBlobPtr compiledBlobPtr, 166 const Optional<BackendId>& backend, 167 const char* name = nullptr); 168 169 IConnectableLayer* AddPreluLayer(const char* name = nullptr); 170 171 IConnectableLayer* AddQuantizeLayer(const char* name = nullptr); 172 173 IConnectableLayer* AddQLstmLayer(const QLstmDescriptor& descriptor, 174 const LstmInputParams& params, 175 const char* name = nullptr); 176 177 IConnectableLayer* AddQuantizedLstmLayer(const QuantizedLstmInputParams& params, 178 const char* name = nullptr); 179 180 IConnectableLayer* AddRankLayer(const char* name = nullptr); 181 182 IConnectableLayer* AddReduceLayer(const ReduceDescriptor& reduceDescriptor, 183 const char* name = nullptr); 184 185 IConnectableLayer* AddResizeLayer(const ResizeDescriptor& resizeDescriptor, 186 const char* name = nullptr); 187 188 IConnectableLayer* AddReshapeLayer(const ReshapeDescriptor& reshapeDescriptor, 189 const char* name = nullptr); 190 191 IConnectableLayer* AddShapeLayer(const char* name = nullptr); 192 193 IConnectableLayer* AddSliceLayer(const SliceDescriptor& sliceDescriptor, const char* name = nullptr); 194 195 IConnectableLayer* AddSoftmaxLayer(const SoftmaxDescriptor& softmaxDescriptor, 196 const char* name = nullptr); 197 198 IConnectableLayer* AddSplitterLayer(const ViewsDescriptor& splitterDescriptor, 199 const char* name = nullptr); 200 201 IConnectableLayer* AddSpaceToBatchNdLayer(const SpaceToBatchNdDescriptor& spaceToBatchNdDescriptor, 202 const char* name = nullptr); 203 204 IConnectableLayer* AddSpaceToDepthLayer(const SpaceToDepthDescriptor& spaceToDepthDescriptor, 205 const char* name = nullptr); 206 207 IConnectableLayer* AddStackLayer(const StackDescriptor& stackDescriptor, 208 const char* name = nullptr); 209 210 IConnectableLayer* AddStandInLayer(const StandInDescriptor& descriptor, 211 const char* name = nullptr); 212 213 IConnectableLayer* AddStridedSliceLayer(const StridedSliceDescriptor& stridedSliceDescriptor, 214 const char* name = nullptr); 215 216 ARMNN_DEPRECATED_MSG_REMOVAL_DATE("Use AddElementwiseBinaryLayer instead", "24.02") 217 IConnectableLayer* AddSubtractionLayer(const char* name = nullptr); 218 219 IConnectableLayer* AddSwitchLayer(const char* name = nullptr); 220 221 IConnectableLayer* AddTransposeConvolution2dLayer(const TransposeConvolution2dDescriptor& descriptor, 222 const ConstTensor& weights, 223 const Optional<ConstTensor>& biases, 224 const char* name = nullptr); 225 226 IConnectableLayer* AddTransposeLayer(const TransposeDescriptor& transposeDescriptor, 227 const char* name = nullptr); 228 229 IConnectableLayer* AddUnidirectionalSequenceLstmLayer(const UnidirectionalSequenceLstmDescriptor& descriptor, 230 const LstmInputParams& params, 231 const char* name = nullptr); 232 233 IConnectableLayer* AddConvertFp16ToFp32Layer(const char* name = nullptr); 234 235 IConnectableLayer* AddConvertFp32ToFp16Layer(const char* name = nullptr); 236 237 void ExecuteStrategy(IStrategy& strategy) const; 238 239 private: 240 241 bool GetShapeInferenceMethod(); 242 bool GetAllowExpandedDims(); 243 NetworkOptions m_NetworkOptions; 244 245 std::unique_ptr<Graph> m_Graph; 246 ModelOptions m_ModelOptions; 247 }; 248 249 struct OptimizationResult 250 { 251 bool m_Warning; 252 bool m_Error; 253 OptimizationResultarmnn::OptimizationResult254 OptimizationResult(bool warning, bool error) 255 : m_Warning(warning), m_Error(error) 256 {} 257 OptimizationResultarmnn::OptimizationResult258 OptimizationResult() 259 : OptimizationResult(false, false) 260 {} 261 IsOkarmnn::OptimizationResult262 bool IsOk() const 263 { return !m_Warning && !m_Error; } IsWarningOnlyarmnn::OptimizationResult264 bool IsWarningOnly() const 265 { return m_Warning && !m_Error; } IsErrorarmnn::OptimizationResult266 bool IsError() const 267 { return m_Error; } 268 269 }; 270 271 using BackendsMap = std::map<BackendId, std::unique_ptr<class IBackendInternal>>; 272 273 BackendsMap CreateSupportedBackends(TensorHandleFactoryRegistry& handleFactoryRegistry, 274 struct BackendSettings& backendSettings); 275 276 OptimizationResult SelectTensorHandleStrategy(Graph& optGraph, 277 BackendsMap& backends, 278 TensorHandleFactoryRegistry& registry, 279 bool importEnabled, 280 bool exportEnabled, 281 Optional<std::vector<std::string>&> errMessages); 282 283 OptimizationResult AssignBackends(OptimizedNetworkImpl* optNetObjPtr, 284 BackendSettings& backendSettings, 285 Graph::Iterator& firstLayer, 286 Graph::Iterator& lastLayer, 287 Optional<std::vector<std::string>&> errMessages); 288 289 290 OptimizationResult AssignBackends(OptimizedNetworkImpl* optNetObjPtr, 291 BackendSettings& backendSettings, 292 SubgraphView::IConnectableLayerIterator& firstLayer, 293 SubgraphView::IConnectableLayerIterator& lastLayer, 294 Optional<std::vector<std::string>&> errMessages); 295 296 struct OptimizerOptionsOpaqueImpl 297 { 298 ~OptimizerOptionsOpaqueImpl() = default; 299 OptimizerOptionsOpaqueImplarmnn::OptimizerOptionsOpaqueImpl300 explicit OptimizerOptionsOpaqueImpl() 301 : m_ReduceFp32ToFp16(false) 302 , m_Debug(false) 303 , m_DebugToFile(false) 304 , m_ReduceFp32ToBf16(false) 305 , m_shapeInferenceMethod(armnn::ShapeInferenceMethod::ValidateOnly) 306 , m_ImportEnabled(false) 307 , m_ModelOptions() 308 , m_ProfilingEnabled(false) 309 , m_ExportEnabled(false) 310 , m_AllowExpandedDims(false) 311 { 312 } 313 OptimizerOptionsOpaqueImplarmnn::OptimizerOptionsOpaqueImpl314 explicit OptimizerOptionsOpaqueImpl(bool reduceFp32ToFp16, bool debug, bool reduceFp32ToBf16, 315 bool importEnabled, ModelOptions modelOptions = {}, 316 bool exportEnabled = false, bool debugToFile = false) 317 : m_ReduceFp32ToFp16(reduceFp32ToFp16) 318 , m_Debug(debug) 319 , m_DebugToFile(debugToFile) 320 , m_ReduceFp32ToBf16(reduceFp32ToBf16) 321 , m_shapeInferenceMethod(armnn::ShapeInferenceMethod::ValidateOnly) 322 , m_ImportEnabled(importEnabled) 323 , m_ModelOptions(modelOptions) 324 , m_ProfilingEnabled(false) 325 , m_ExportEnabled(exportEnabled) 326 , m_AllowExpandedDims(false) 327 { 328 } 329 OptimizerOptionsOpaqueImplarmnn::OptimizerOptionsOpaqueImpl330 explicit OptimizerOptionsOpaqueImpl(bool reduceFp32ToFp16, bool debug, bool reduceFp32ToBf16, 331 ShapeInferenceMethod shapeInferenceMethod, 332 bool importEnabled, ModelOptions modelOptions, bool exportEnabled, 333 bool debugToFile, bool allowExpandedDims) 334 : m_ReduceFp32ToFp16(reduceFp32ToFp16) 335 , m_Debug(debug) 336 , m_DebugToFile(debugToFile) 337 , m_ReduceFp32ToBf16(reduceFp32ToBf16) 338 , m_shapeInferenceMethod(shapeInferenceMethod) 339 , m_ImportEnabled(importEnabled) 340 , m_ModelOptions(modelOptions) 341 , m_ProfilingEnabled(false) 342 , m_ExportEnabled(exportEnabled) 343 , m_AllowExpandedDims(allowExpandedDims) 344 { 345 } 346 347 /// Reduces all Fp32 operators in the model to Fp16 for faster processing. 348 /// @Note This feature works best if all operators of the model are in Fp32. ArmNN will add conversion layers 349 /// between layers that weren't in Fp32 in the first place or if the operator is not supported in Fp16. 350 /// The overhead of these conversions can lead to a slower overall performance if too many conversions are 351 /// required. 352 bool m_ReduceFp32ToFp16 = false; 353 354 /// Add debug data for easier troubleshooting 355 bool m_Debug = false; 356 357 /// Pass debug data to separate output files for easier troubleshooting 358 bool m_DebugToFile = false; 359 360 /// @Note This feature has been replaced by enabling Fast Math in compute library backend options. 361 /// This is currently a placeholder option 362 bool m_ReduceFp32ToBf16 = false; 363 364 /// Infer output size when not available 365 ShapeInferenceMethod m_shapeInferenceMethod = armnn::ShapeInferenceMethod::ValidateOnly; 366 367 /// Enable Import 368 bool m_ImportEnabled = false; 369 370 /// Enable Model Options 371 ModelOptions m_ModelOptions; 372 373 /// Enable profiling dump of the optimizer phase 374 bool m_ProfilingEnabled = false; 375 376 /// Enable Export 377 bool m_ExportEnabled = false; 378 379 /// When calculating tensor sizes, dimensions of size == 1 will be ignored 380 bool m_AllowExpandedDims = false; 381 }; 382 383 } // namespace armnn 384