1 // 2 // Copyright © 2018-2023 Arm Ltd and Contributors. All rights reserved. 3 // SPDX-License-Identifier: MIT 4 // 5 #pragma once 6 7 #include <array> 8 #include <functional> 9 #include <stdint.h> 10 #include <chrono> 11 #include "BackendId.hpp" 12 #include "Exceptions.hpp" 13 #include "Deprecated.hpp" 14 15 namespace arm 16 { 17 namespace pipe 18 { 19 20 class ProfilingGuid; 21 22 } // namespace arm 23 } // namespace pipe 24 25 /// Define LayerGuid type. 26 using LayerGuid = arm::pipe::ProfilingGuid; 27 28 namespace armnn 29 { 30 31 constexpr unsigned int MaxNumOfTensorDimensions = 5U; 32 33 /// The lowest performance data capture interval we support is 10 miliseconds. 34 constexpr unsigned int LOWEST_CAPTURE_PERIOD = 10000u; 35 36 /// Variable to control expire rate of priority queue 37 constexpr unsigned int EXPIRE_RATE = 3U; 38 39 /// @enum Status enumeration 40 /// @var Status::Successful 41 /// @var Status::Failure 42 enum class Status 43 { 44 Success = 0, 45 Failure = 1 46 }; 47 48 enum class DataType 49 { 50 Float16 = 0, 51 Float32 = 1, 52 QAsymmU8 = 2, 53 Signed32 = 3, 54 Boolean = 4, 55 QSymmS16 = 5, 56 QSymmS8 = 6, 57 QAsymmS8 = 7, 58 BFloat16 = 8, 59 Signed64 = 9, 60 }; 61 62 enum class DataLayout 63 { 64 NCHW = 1, 65 NHWC = 2, 66 NDHWC = 3, 67 NCDHW = 4 68 }; 69 70 /// Define the behaviour of the internal profiler when outputting network details 71 enum class ProfilingDetailsMethod 72 { 73 Undefined = 0, 74 DetailsWithEvents = 1, 75 DetailsOnly = 2 76 }; 77 78 79 enum class QosExecPriority 80 { 81 Low = 0, 82 Medium = 1, 83 High = 2 84 }; 85 86 enum class ActivationFunction 87 { 88 Sigmoid = 0, 89 TanH = 1, 90 Linear = 2, 91 ReLu = 3, 92 BoundedReLu = 4, ///< min(a, max(b, input)) ReLu1 & ReLu6. 93 SoftReLu = 5, 94 LeakyReLu = 6, 95 Abs = 7, 96 Sqrt = 8, 97 Square = 9, 98 Elu = 10, 99 HardSwish = 11 100 }; 101 102 enum class ArgMinMaxFunction 103 { 104 Min = 0, 105 Max = 1 106 }; 107 108 enum class ComparisonOperation 109 { 110 Equal = 0, 111 Greater = 1, 112 GreaterOrEqual = 2, 113 Less = 3, 114 LessOrEqual = 4, 115 NotEqual = 5 116 }; 117 118 enum class LogicalBinaryOperation 119 { 120 LogicalAnd = 0, 121 LogicalOr = 1 122 }; 123 124 enum class UnaryOperation 125 { 126 Abs = 0, 127 Exp = 1, 128 Sqrt = 2, 129 Rsqrt = 3, 130 Neg = 4, 131 LogicalNot = 5, 132 Log = 6, 133 Sin = 7, 134 Ceil = 8 135 }; 136 137 enum class BinaryOperation 138 { 139 Add = 0, 140 Div = 1, 141 Maximum = 2, 142 Minimum = 3, 143 Mul = 4, 144 Sub = 5 145 }; 146 147 enum class PoolingAlgorithm 148 { 149 Max = 0, 150 Average = 1, 151 L2 = 2 152 }; 153 154 enum class ReduceOperation 155 { 156 Sum = 0, 157 Max = 1, 158 Mean = 2, 159 Min = 3, 160 Prod = 4 161 }; 162 163 enum class ResizeMethod 164 { 165 Bilinear = 0, 166 NearestNeighbor = 1 167 }; 168 169 enum class Dimensionality 170 { 171 NotSpecified = 0, 172 Specified = 1, 173 Scalar = 2 174 }; 175 176 /// 177 /// The padding method modifies the output of pooling layers. 178 /// In both supported methods, the values are ignored (they are 179 /// not even zeroes, which would make a difference for max pooling 180 /// a tensor with negative values). The difference between 181 /// IgnoreValue and Exclude is that the former counts the padding 182 /// fields in the divisor of Average and L2 pooling, while 183 /// Exclude does not. 184 /// 185 enum class PaddingMethod 186 { 187 /// The padding fields count, but are ignored 188 IgnoreValue = 0, 189 /// The padding fields don't count and are ignored 190 Exclude = 1 191 }; 192 193 /// 194 /// The padding mode controls whether the padding should be filled with constant values (Constant), or 195 /// reflect the input, either including the border values (Symmetric) or not (Reflect). 196 /// 197 enum class PaddingMode 198 { 199 Constant = 0, 200 Reflect = 1, 201 Symmetric = 2 202 }; 203 204 enum class NormalizationAlgorithmChannel 205 { 206 Across = 0, 207 Within = 1 208 }; 209 210 enum class NormalizationAlgorithmMethod 211 { 212 /// Krichevsky 2012: Local Brightness Normalization 213 LocalBrightness = 0, 214 /// Jarret 2009: Local Contrast Normalization 215 LocalContrast = 1 216 }; 217 218 enum class OutputShapeRounding 219 { 220 Floor = 0, 221 Ceiling = 1 222 }; 223 224 /// 225 /// The ShapeInferenceMethod modify how the output shapes are treated. 226 /// When ValidateOnly is selected, the output shapes are inferred from the input parameters of the layer 227 /// and any mismatch is reported. 228 /// When InferAndValidate is selected 2 actions are performed: (1)infer output shape from inputs and (2)validate the 229 /// shapes as in ValidateOnly. This option has been added to work with tensors which rank or dimension sizes are not 230 /// specified explicitly, however this information can be calculated from the inputs. 231 /// 232 enum class ShapeInferenceMethod 233 { 234 /// Validate all output shapes 235 ValidateOnly = 0, 236 /// Infer missing output shapes and validate all output shapes 237 InferAndValidate = 1 238 }; 239 240 /// Define the Memory Source to reduce copies 241 enum class MemorySource : uint32_t 242 { 243 Undefined = 0, 244 Malloc = 1, 245 DmaBuf = 2, 246 DmaBufProtected = 4, 247 Gralloc = 8 248 }; 249 250 enum class MemBlockStrategyType 251 { 252 // MemBlocks can be packed on the Y axis only, overlap allowed on X axis. 253 // In other words MemBlocks with overlapping lifetimes cannot use the same MemBin, 254 // equivalent to blob or pooling memory management. 255 SingleAxisPacking = 0, 256 257 // MemBlocks can be packed on either Y or X axis but cannot overlap on both. 258 // In other words MemBlocks with overlapping lifetimes can use the same MemBin, 259 // equivalent to offset or slab memory management. 260 MultiAxisPacking = 1 261 }; 262 263 /// Each backend should implement an IBackend. 264 class IBackend 265 { 266 protected: IBackend()267 IBackend() {} ~IBackend()268 virtual ~IBackend() {} 269 270 public: 271 virtual const BackendId& GetId() const = 0; 272 }; 273 274 using IBackendSharedPtr = std::shared_ptr<IBackend>; 275 using IBackendUniquePtr = std::unique_ptr<IBackend, void(*)(IBackend* backend)>; 276 277 /// BackendCapability class 278 enum class BackendCapability : uint32_t 279 { 280 /// Constant weights can be accessed through the descriptors, 281 /// On the other hand, non-const weights can be accessed through inputs. 282 NonConstWeights, 283 284 /// Asynchronous Execution. 285 AsyncExecution, 286 287 // add new enum values here 288 }; 289 290 /// Device specific knowledge to be passed to the optimizer. 291 class IDeviceSpec 292 { 293 protected: IDeviceSpec()294 IDeviceSpec() {} ~IDeviceSpec()295 virtual ~IDeviceSpec() {} 296 public: 297 virtual const BackendIdSet& GetSupportedBackends() const = 0; 298 }; 299 300 /// Type of identifiers for bindable layers (inputs, outputs). 301 using LayerBindingId = int; 302 using ImportedInputId = unsigned int; 303 using ImportedOutputId = unsigned int; 304 305 306 class PermutationVector 307 { 308 public: 309 using ValueType = unsigned int; 310 using SizeType = unsigned int; 311 using ArrayType = std::array<ValueType, MaxNumOfTensorDimensions>; 312 using ConstIterator = typename ArrayType::const_iterator; 313 314 /// @param dimMappings - Indicates how to translate tensor elements from a given source into the target destination, 315 /// when source and target potentially have different memory layouts. 316 /// 317 /// E.g. For a 4-d tensor laid out in a memory with the format (Batch Element, Height, Width, Channels), 318 /// which is to be passed as an input to ArmNN, each source dimension is mapped to the corresponding 319 /// ArmNN dimension. The Batch dimension remains the same (0 -> 0). The source Height dimension is mapped 320 /// to the location of the ArmNN Height dimension (1 -> 2). Similar arguments are made for the Width and 321 /// Channels (2 -> 3 and 3 -> 1). This will lead to @ref m_DimMappings pointing to the following array: 322 /// [ 0, 2, 3, 1 ]. 323 /// 324 /// Note that the mapping should be reversed if considering the case of ArmNN 4-d outputs (Batch Element, 325 /// Channels, Height, Width) being written to a destination with the format mentioned above. We now have 326 /// 0 -> 0, 2 -> 1, 3 -> 2, 1 -> 3, which, when reordered, lead to the following @ref m_DimMappings contents: 327 /// [ 0, 3, 1, 2 ]. 328 /// 329 PermutationVector(const ValueType *dimMappings, SizeType numDimMappings); 330 331 PermutationVector(std::initializer_list<ValueType> dimMappings); 332 333 /// 334 /// Indexing method with out-of-bounds error checking for the m_DimMappings array. 335 /// @param i - integer value corresponding to index of m_DimMappings array to retrieve element from. 336 /// @return element at index i of m_DimMappings array. 337 /// @throws InvalidArgumentException when indexing out-of-bounds index of m_DimMappings array. 338 /// operator [](SizeType i) const339 ValueType operator[](SizeType i) const 340 { 341 if (i >= GetSize()) 342 { 343 throw InvalidArgumentException("Invalid indexing of PermutationVector of size " + std::to_string(GetSize()) 344 + " at location [" + std::to_string(i) + "]."); 345 } 346 return m_DimMappings.at(i); 347 } 348 GetSize() const349 SizeType GetSize() const { return m_NumDimMappings; } 350 begin() const351 ConstIterator begin() const { return m_DimMappings.begin(); } 352 /** 353 * 354 * @return pointer one past the end of the number of mapping not the length of m_DimMappings. 355 */ end() const356 ConstIterator end() const { return m_DimMappings.begin() + m_NumDimMappings; } 357 IsEqual(const PermutationVector & other) const358 bool IsEqual(const PermutationVector& other) const 359 { 360 if (m_NumDimMappings != other.m_NumDimMappings) return false; 361 for (unsigned int i = 0; i < m_NumDimMappings; ++i) 362 { 363 if (m_DimMappings[i] != other.m_DimMappings[i]) return false; 364 } 365 return true; 366 } 367 IsInverse(const PermutationVector & other) const368 bool IsInverse(const PermutationVector& other) const 369 { 370 bool isInverse = (GetSize() == other.GetSize()); 371 for (SizeType i = 0; isInverse && (i < GetSize()); ++i) 372 { 373 isInverse = (m_DimMappings[other.m_DimMappings[i]] == i); 374 } 375 return isInverse; 376 } 377 378 private: 379 ArrayType m_DimMappings; 380 /// Number of valid entries in @ref m_DimMappings 381 SizeType m_NumDimMappings; 382 }; 383 384 class ITensorHandle; 385 386 /// Define the type of callback for the Debug layer to call 387 /// @param guid - guid of layer connected to the input of the Debug layer 388 /// @param slotIndex - index of the output slot connected to the input of the Debug layer 389 /// @param tensorHandle - TensorHandle for the input tensor to the Debug layer 390 using DebugCallbackFunction = std::function<void(LayerGuid guid, unsigned int slotIndex, ITensorHandle* tensorHandle)>; 391 392 /// Define a timer and associated inference ID for recording execution times 393 using HighResolutionClock = std::chrono::high_resolution_clock::time_point; 394 using InferenceTimingPair = std::pair<HighResolutionClock, HighResolutionClock>; 395 396 397 /// This list uses X macro technique. 398 /// See https://en.wikipedia.org/wiki/X_Macro for more info 399 // New layers should be added at last position to minimize instability. 400 #define LIST_OF_LAYER_TYPE \ 401 X(Activation) \ 402 X(Addition) \ 403 X(ArgMinMax) \ 404 X(BatchNormalization) \ 405 X(BatchToSpaceNd) \ 406 X(Comparison) \ 407 X(Concat) \ 408 X(Constant) \ 409 X(ConvertFp16ToFp32) \ 410 X(ConvertFp32ToFp16) \ 411 X(Convolution2d) \ 412 X(Debug) \ 413 X(DepthToSpace) \ 414 X(DepthwiseConvolution2d) \ 415 X(Dequantize) \ 416 X(DetectionPostProcess) \ 417 X(Division) \ 418 X(ElementwiseUnary) \ 419 X(FakeQuantization) \ 420 X(Fill) \ 421 X(Floor) \ 422 X(FullyConnected) \ 423 X(Gather) \ 424 X(Input) \ 425 X(InstanceNormalization) \ 426 X(L2Normalization) \ 427 X(LogicalBinary) \ 428 X(LogSoftmax) \ 429 X(Lstm) \ 430 X(QLstm) \ 431 X(Map) \ 432 X(Maximum) \ 433 X(Mean) \ 434 X(MemCopy) \ 435 X(MemImport) \ 436 X(Merge) \ 437 X(Minimum) \ 438 X(Multiplication) \ 439 X(Normalization) \ 440 X(Output) \ 441 X(Pad) \ 442 X(Permute) \ 443 X(Pooling2d) \ 444 X(PreCompiled) \ 445 X(Prelu) \ 446 X(Quantize) \ 447 X(QuantizedLstm) \ 448 X(Reshape) \ 449 X(Rank) \ 450 X(Resize) \ 451 X(Reduce) \ 452 X(Slice) \ 453 X(Softmax) \ 454 X(SpaceToBatchNd) \ 455 X(SpaceToDepth) \ 456 X(Splitter) \ 457 X(Stack) \ 458 X(StandIn) \ 459 X(StridedSlice) \ 460 X(Subtraction) \ 461 X(Switch) \ 462 X(Transpose) \ 463 X(TransposeConvolution2d) \ 464 X(Unmap) \ 465 X(Cast) \ 466 X(Shape) \ 467 X(UnidirectionalSequenceLstm) \ 468 X(ChannelShuffle) \ 469 X(Convolution3d) \ 470 X(Pooling3d) \ 471 X(GatherNd) \ 472 X(BatchMatMul) \ 473 X(ElementwiseBinary) \ 474 475 // New layers should be added at last position to minimize instability. 476 477 /// When adding a new layer, adapt also the LastLayer enum value in the 478 /// enum class LayerType below 479 enum class LayerType 480 { 481 #define X(name) name, 482 LIST_OF_LAYER_TYPE 483 #undef X 484 FirstLayer = Activation, 485 LastLayer = ElementwiseBinary 486 }; 487 488 const char* GetLayerTypeAsCString(LayerType type); 489 490 } // namespace armnn 491