1 /* 2 * Copyright (c) 2016-2020 Arm Limited. 3 * 4 * SPDX-License-Identifier: MIT 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a copy 7 * of this software and associated documentation files (the "Software"), to 8 * deal in the Software without restriction, including without limitation the 9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or 10 * sell copies of the Software, and to permit persons to whom the Software is 11 * furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included in all 14 * copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 * SOFTWARE. 23 */ 24 #ifndef ARM_COMPUTE_TYPES_H 25 #define ARM_COMPUTE_TYPES_H 26 27 #include "arm_compute/core/Coordinates.h" 28 #include "arm_compute/core/QuantizationInfo.h" 29 #include "arm_compute/core/Size2D.h" 30 #include "arm_compute/core/Strides.h" 31 #include "arm_compute/core/TensorShape.h" 32 #include "arm_compute/core/utils/misc/Macros.h" 33 #include "support/Bfloat16.h" 34 #include "support/Half.h" 35 36 #include <cmath> 37 #include <cstddef> 38 #include <cstdint> 39 #include <map> 40 #include <string> 41 #include <utility> 42 43 namespace arm_compute 44 { 45 /** 16-bit floating point type */ 46 using half = half_float::half; 47 48 /** Permutation vector */ 49 using PermutationVector = Strides; 50 /** Bidirectional strides */ 51 using BiStrides = Coordinates; 52 53 /** Image colour formats */ 54 enum class Format 55 { 56 UNKNOWN, /**< Unknown image format */ 57 U8, /**< 1 channel, 1 U8 per channel */ 58 S16, /**< 1 channel, 1 S16 per channel */ 59 U16, /**< 1 channel, 1 U16 per channel */ 60 S32, /**< 1 channel, 1 S32 per channel */ 61 U32, /**< 1 channel, 1 U32 per channel */ 62 BFLOAT16, /**< 16-bit brain floating-point number */ 63 F16, /**< 1 channel, 1 F16 per channel */ 64 F32, /**< 1 channel, 1 F32 per channel */ 65 UV88, /**< 2 channel, 1 U8 per channel */ 66 RGB888, /**< 3 channels, 1 U8 per channel */ 67 RGBA8888, /**< 4 channels, 1 U8 per channel */ 68 YUV444, /**< A 3 plane of 8 bit 4:4:4 sampled Y, U, V planes */ 69 YUYV422, /**< A single plane of 32-bit macro pixel of Y0, U0, Y1, V0 bytes */ 70 NV12, /**< A 2 plane YUV format of Luma (Y) and interleaved UV data at 4:2:0 sampling */ 71 NV21, /**< A 2 plane YUV format of Luma (Y) and interleaved VU data at 4:2:0 sampling */ 72 IYUV, /**< A 3 plane of 8-bit 4:2:0 sampled Y, U, V planes */ 73 UYVY422 /**< A single plane of 32-bit macro pixel of U0, Y0, V0, Y1 byte */ 74 }; 75 76 /** Available data types */ 77 enum class DataType 78 { 79 UNKNOWN, /**< Unknown data type */ 80 U8, /**< unsigned 8-bit number */ 81 S8, /**< signed 8-bit number */ 82 QSYMM8, /**< quantized, symmetric fixed-point 8-bit number */ 83 QASYMM8, /**< quantized, asymmetric fixed-point 8-bit number unsigned */ 84 QASYMM8_SIGNED, /**< quantized, asymmetric fixed-point 8-bit number signed */ 85 QSYMM8_PER_CHANNEL, /**< quantized, symmetric per channel fixed-point 8-bit number */ 86 U16, /**< unsigned 16-bit number */ 87 S16, /**< signed 16-bit number */ 88 QSYMM16, /**< quantized, symmetric fixed-point 16-bit number */ 89 QASYMM16, /**< quantized, asymmetric fixed-point 16-bit number */ 90 U32, /**< unsigned 32-bit number */ 91 S32, /**< signed 32-bit number */ 92 U64, /**< unsigned 64-bit number */ 93 S64, /**< signed 64-bit number */ 94 BFLOAT16, /**< 16-bit brain floating-point number */ 95 F16, /**< 16-bit floating-point number */ 96 F32, /**< 32-bit floating-point number */ 97 F64, /**< 64-bit floating-point number */ 98 SIZET /**< size_t */ 99 }; 100 101 /** Available Sampling Policies */ 102 enum class SamplingPolicy 103 { 104 CENTER, /**< Samples are taken at pixel center */ 105 TOP_LEFT /**< Samples are taken at pixel top left corner */ 106 }; 107 108 /** Constant value of the border pixels when using BorderMode::CONSTANT */ 109 constexpr uint8_t CONSTANT_BORDER_VALUE = 199; 110 111 /** Constant value used to indicate a half-scale pyramid */ 112 constexpr float SCALE_PYRAMID_HALF = 0.5f; 113 114 /** Constant value used to indicate a ORB scaled pyramid */ 115 constexpr float SCALE_PYRAMID_ORB = 8.408964152537146130583778358414e-01; 116 117 /** [DataLayout enum definition] **/ 118 119 /** Supported tensor data layouts */ 120 enum class DataLayout 121 { 122 UNKNOWN, /**< Unknown data layout */ 123 NCHW, /**< Num samples, channels, height, width */ 124 NHWC /**< Num samples, height, width, channels */ 125 }; 126 /** [DataLayout enum definition] **/ 127 128 /** Supported tensor data layout dimensions */ 129 enum class DataLayoutDimension 130 { 131 CHANNEL, /**< channel */ 132 HEIGHT, /**< height */ 133 WIDTH, /**< width */ 134 BATCHES /**< batches */ 135 }; 136 137 /** Available ConvolutionMethod*/ 138 enum class ConvolutionMethod 139 { 140 GEMM, /**< Convolution using GEMM */ 141 GEMM_CONV2D, /**< Direct 2D GEMM convolution */ 142 DIRECT, /**< Direct convolution */ 143 WINOGRAD, /**< Convolution using Winograd */ 144 FFT /**< Convolution using FFT */ 145 }; 146 147 /** Available DepthwiseConvolutionFunction*/ 148 enum class DepthwiseConvolutionFunction 149 { 150 OPTIMIZED, /**< Optimized Depthwise Convolution */ 151 GENERIC, /**< Generic Depthwise Convolution */ 152 }; 153 154 /** Available DeconvolutionMethod*/ 155 enum class DeconvolutionMethod 156 { 157 GEMM, /**< Deconvolution using GEMM */ 158 DIRECT, /**< Direct deconvolution */ 159 }; 160 161 /** Available FuseBatchNormalizationType*/ 162 enum class FuseBatchNormalizationType 163 { 164 CONVOLUTION, /**< For Convolution weights */ 165 DEPTHWISECONVOLUTION /**< For Depthwise Convolution weights*/ 166 }; 167 168 /** Padding mode to use for PadLayer */ 169 enum class PaddingMode 170 { 171 CONSTANT, 172 REFLECT, 173 SYMMETRIC 174 }; 175 176 /** Supported comparison operations */ 177 enum class ComparisonOperation 178 { 179 Equal, /**< Equal comparison ( \f$ x == y \f$ ) */ 180 NotEqual, /**< NotEqual comparison ( \f$ x != y \f$ ) */ 181 Greater, /**< Greater comparison ( \f$ x > y \f$ ) */ 182 GreaterEqual, /**< Greater equal comparison ( \f$ x >= y \f$ ) */ 183 Less, /**< Less comparison ( \f$ x < y \f$ ) */ 184 LessEqual /**< Less equal comparison ( \f$ x <= y \f$ ) */ 185 }; 186 187 /** Container for valid region of a window */ 188 struct ValidRegion 189 { 190 /** Default constructor */ ValidRegionValidRegion191 ValidRegion() 192 : anchor{}, shape{} 193 { 194 } 195 196 /** Allow instances of this class to be copy constructed */ 197 ValidRegion(const ValidRegion &) = default; 198 /** Allow instances of this class to be move constructed */ 199 ValidRegion(ValidRegion &&) = default; 200 /** Allow instances of this class to be copied */ 201 ValidRegion &operator=(const ValidRegion &) = default; 202 /** Allow instances of this class to be moved */ 203 ValidRegion &operator=(ValidRegion &&) = default; 204 /** Default destructor */ 205 ~ValidRegion() = default; 206 207 /** Constructor for a valid region with default number of dimensions 208 * 209 * @param[in] an_anchor Anchor for the start of the valid region. 210 * @param[in] a_shape Shape of the valid region. 211 * 212 */ ValidRegionValidRegion213 ValidRegion(const Coordinates &an_anchor, const TensorShape &a_shape) 214 : anchor{ an_anchor }, shape{ a_shape } 215 { 216 anchor.set_num_dimensions(std::max(anchor.num_dimensions(), shape.num_dimensions())); 217 } 218 219 /** Constructor for a valid region with specified number of dimensions 220 * 221 * @param[in] an_anchor Anchor for the start of the valid region. 222 * @param[in] a_shape Shape of the valid region. 223 * @param[in] num_dimensions Number of dimensions (must be >= number of dimensions of anchor and shape). 224 * 225 */ ValidRegionValidRegion226 ValidRegion(const Coordinates &an_anchor, const TensorShape &a_shape, size_t num_dimensions) 227 : anchor{ an_anchor }, shape{ a_shape } 228 { 229 ARM_COMPUTE_ERROR_ON(num_dimensions < std::max(anchor.num_dimensions(), shape.num_dimensions())); 230 anchor.set_num_dimensions(num_dimensions); 231 } 232 233 /** Return the start of the valid region for the given dimension @p d */ startValidRegion234 int start(unsigned int d) const 235 { 236 return anchor[d]; 237 } 238 239 /** Return the end of the valid region for the given dimension @p d */ endValidRegion240 int end(unsigned int d) const 241 { 242 return anchor[d] + shape[d]; 243 } 244 245 /** Accessor to set the value of anchor and shape for one of the dimensions. 246 * 247 * @param[in] dimension Dimension for which the value is set. 248 * @param[in] start Value to be set in anchor for the dimension. 249 * @param[in] size Value to be set in shape for the dimension. 250 * 251 * @return *this. 252 */ setValidRegion253 ValidRegion &set(size_t dimension, int start, size_t size) 254 { 255 anchor.set(dimension, start); 256 shape.set(dimension, size); 257 return *this; 258 } 259 260 Coordinates anchor; /**< Anchor for the start of the valid region. */ 261 TensorShape shape; /**< Shape of the valid region. */ 262 }; 263 264 /** Methods available to handle borders */ 265 enum class BorderMode 266 { 267 UNDEFINED, /**< Borders are left undefined */ 268 CONSTANT, /**< Pixels outside the image are assumed to have a constant value */ 269 REPLICATE /**< Pixels outside the image are assumed to have the same value as the closest image pixel */ 270 }; 271 272 /** Container for 2D border size */ 273 struct BorderSize 274 { 275 /** Empty border, i.e. no border */ BorderSizeBorderSize276 constexpr BorderSize() 277 : top{ 0 }, right{ 0 }, bottom{ 0 }, left{ 0 } 278 { 279 } 280 281 /** Border with equal size around the 2D plane */ BorderSizeBorderSize282 explicit constexpr BorderSize(unsigned int size) 283 : top{ size }, right{ size }, bottom{ size }, left{ size } 284 { 285 } 286 287 /** Border with same size for top/bottom and left/right */ BorderSizeBorderSize288 constexpr BorderSize(unsigned int top_bottom, unsigned int left_right) 289 : top{ top_bottom }, right{ left_right }, bottom{ top_bottom }, left{ left_right } 290 { 291 } 292 293 /** Border with different sizes */ BorderSizeBorderSize294 constexpr BorderSize(unsigned int top, unsigned int right, unsigned int bottom, unsigned int left) 295 : top{ top }, right{ right }, bottom{ bottom }, left{ left } 296 { 297 } 298 299 /** Check if the entire border is zero */ emptyBorderSize300 constexpr bool empty() const 301 { 302 return top == 0 && right == 0 && bottom == 0 && left == 0; 303 } 304 305 /** Check if the border is the same size on all sides */ uniformBorderSize306 constexpr bool uniform() const 307 { 308 return top == right && top == bottom && top == left; 309 } 310 311 /** Scale this border size. 312 * 313 * @param[in] scale Scale to multiply border size by. 314 * 315 * @return *this. 316 */ 317 BorderSize &operator*=(float scale) 318 { 319 top *= scale; 320 right *= scale; 321 bottom *= scale; 322 left *= scale; 323 324 return *this; 325 } 326 327 /** Scale a copy of this border size. 328 * 329 * @param[in] scale Scale to multiply border size by. 330 * 331 * @return a scaled copy of this. 332 */ 333 BorderSize operator*(float scale) 334 { 335 BorderSize size = *this; 336 size *= scale; 337 338 return size; 339 } 340 341 /** Check equality with another BorderSize struct 342 * 343 * @param[in] rhs other struct to check against 344 * 345 * @return true if they are equal 346 */ 347 bool operator==(const BorderSize &rhs) 348 { 349 return (top == rhs.top) && (right == rhs.right) && (bottom == rhs.bottom) && (left == rhs.left); 350 } 351 352 /** Check non-equality with another BorderSize struct 353 * 354 * @param[in] rhs other struct to check against 355 * 356 * @return true if they are different 357 */ 358 bool operator!=(const BorderSize &rhs) 359 { 360 return !(*this == rhs); 361 } 362 363 /** Limit this border size. 364 * 365 * @param[in] limit Border size to limit this border size to. 366 */ limitBorderSize367 void limit(const BorderSize &limit) 368 { 369 top = std::min(top, limit.top); 370 right = std::min(right, limit.right); 371 bottom = std::min(bottom, limit.bottom); 372 left = std::min(left, limit.left); 373 } 374 375 unsigned int top; /**< top of the border */ 376 unsigned int right; /**< right of the border */ 377 unsigned int bottom; /**< bottom of the border */ 378 unsigned int left; /**< left of the border */ 379 }; 380 381 /** Container for 2D padding size */ 382 using PaddingSize = BorderSize; 383 384 /** Policy to handle overflow */ 385 enum class ConvertPolicy 386 { 387 WRAP, /**< Wrap around */ 388 SATURATE /**< Saturate */ 389 }; 390 391 /** Interpolation method */ 392 enum class InterpolationPolicy 393 { 394 NEAREST_NEIGHBOR, /**< Output values are defined to match the source pixel whose center is nearest to the sample position */ 395 BILINEAR, /**< Output values are defined by bilinear interpolation between the pixels */ 396 AREA, /**< Output values are determined by averaging the source pixels whose areas fall under the area of the destination pixel, projected onto the source image */ 397 }; 398 399 /** Bilinear Interpolation method used by LKTracker */ 400 enum class BilinearInterpolation 401 { 402 BILINEAR_OLD_NEW, /**< Old-new method */ 403 BILINEAR_SCHARR /**< Scharr method */ 404 }; 405 406 /** Threshold mode */ 407 enum class ThresholdType 408 { 409 BINARY, /**< Threshold with one value */ 410 RANGE /**< Threshold with two values*/ 411 }; 412 413 /** Termination criteria */ 414 enum class Termination 415 { 416 TERM_CRITERIA_EPSILON, /**< Terminate when within epsilon of a threshold */ 417 TERM_CRITERIA_ITERATIONS, /**< Terminate after a maximum number of iterations */ 418 TERM_CRITERIA_BOTH /**< Terminate on whichever of the other conditions occurs first */ 419 }; 420 421 /** Magnitude calculation type. */ 422 enum class MagnitudeType 423 { 424 L1NORM, /**< L1 normalization type */ 425 L2NORM /**< L2 normalization type */ 426 }; 427 428 /** Phase calculation type. 429 * 430 * @note When PhaseType == SIGNED, each angle is mapped to the range 0 to 255 inclusive otherwise angles between 0 and 180 431 */ 432 enum class PhaseType 433 { 434 SIGNED, /**< Angle range: [0, 360] */ 435 UNSIGNED /**< Angle range: [0, 180] */ 436 }; 437 438 /** Keypoint type */ 439 struct KeyPoint 440 { 441 int32_t x{ 0 }; /**< X coordinates */ 442 int32_t y{ 0 }; /**< Y coordinates */ 443 float strength{ 0.f }; /**< Strength of the point */ 444 float scale{ 0.f }; /**< Scale initialized to 0 by the corner detector */ 445 float orientation{ 0.f }; /**< Orientation initialized to 0 by the corner detector */ 446 int32_t tracking_status{ 0 }; /**< Status initialized to 1 by the corner detector, set to 0 when the point is lost */ 447 float error{ 0.f }; /**< Tracking error initialized to 0 by the corner detector */ 448 }; 449 450 /** Internal key point */ 451 using InternalKeypoint = std::tuple<float, float, float>; /* x,y,strength */ 452 453 /** Rectangle type */ 454 struct Rectangle 455 { 456 uint16_t x; /**< Top-left x coordinate */ 457 uint16_t y; /**< Top-left y coordinate */ 458 uint16_t width; /**< Width of the rectangle */ 459 uint16_t height; /**< Height of the rectangle */ 460 }; 461 462 /** Coordinate type */ 463 struct Coordinates2D 464 { 465 int32_t x; /**< X coordinates */ 466 int32_t y; /**< Y coordinates */ 467 }; 468 469 /** Coordinate type */ 470 struct Coordinates3D 471 { 472 uint32_t x; /**< X coordinates */ 473 uint32_t y; /**< Y coordinates */ 474 uint32_t z; /**< Z coordinates */ 475 }; 476 477 /** Padding information as a pair of unsigned int start/end */ 478 using PaddingInfo = std::pair<uint32_t, uint32_t>; 479 480 /** List of padding information */ 481 using PaddingList = std::vector<PaddingInfo>; 482 483 /** Information to produce a tiled version of a Tensor */ 484 using Multiples = std::vector<uint32_t>; 485 486 /** Available channels */ 487 enum class Channel 488 { 489 UNKNOWN, /** Unknown channel format */ 490 C0, /**< First channel (used by formats with unknown channel types). */ 491 C1, /**< Second channel (used by formats with unknown channel types). */ 492 C2, /**< Third channel (used by formats with unknown channel types). */ 493 C3, /**< Fourth channel (used by formats with unknown channel types). */ 494 R, /**< Red channel. */ 495 G, /**< Green channel. */ 496 B, /**< Blue channel. */ 497 A, /**< Alpha channel. */ 498 Y, /**< Luma channel. */ 499 U, /**< Cb/U channel. */ 500 V /**< Cr/V/Value channel. */ 501 }; 502 503 /** Available matrix patterns */ 504 enum class MatrixPattern 505 { 506 BOX, /**< Box pattern matrix. */ 507 CROSS, /**< Cross pattern matrix. */ 508 DISK, /**< Disk pattern matrix. */ 509 OTHER /**< Any other matrix pattern. */ 510 }; 511 512 /** Available non linear functions. */ 513 enum class NonLinearFilterFunction : unsigned 514 { 515 MEDIAN = 0, /**< Non linear median filter. */ 516 MIN = 1, /**< Non linear erode. */ 517 MAX = 2, /**< Non linear dilate. */ 518 }; 519 520 /** Available reduction operations */ 521 enum class ReductionOperation 522 { 523 ARG_IDX_MAX, /**< Index of the max value */ 524 ARG_IDX_MIN, /**< Index of the min value */ 525 MEAN_SUM, /**< Mean of sum */ 526 PROD, /**< Product */ 527 SUM_SQUARE, /**< Sum of squares */ 528 SUM, /**< Sum */ 529 MIN, /**< Min */ 530 MAX, /**< Max */ 531 }; 532 533 /** Available element-wise operations */ 534 enum class ArithmeticOperation 535 { 536 ADD, /**< (x + y) */ 537 SUB, /**< (x - y) */ 538 DIV, /**< (x / y) */ 539 MIN, /**< Min(x, y) */ 540 MAX, /**< Max(x, y) */ 541 SQUARED_DIFF, /**< (x - y)^2 */ 542 POWER, /**< x ^ y */ 543 PRELU, /**< y*x if x < 0, x otherwise */ 544 }; 545 546 /** Available element wise unary operations */ 547 enum class ElementWiseUnary 548 { 549 RSQRT, /**< Reverse square root */ 550 EXP, /**< Exponential */ 551 NEG, /**< Negate */ 552 LOG, /**< Natural Logarithm */ 553 ABS, /**< Absolute value */ 554 SIN, /**< Sine */ 555 ROUND, /**< Round */ 556 LOGICAL_NOT, /**< Logical Not */ 557 }; 558 559 /** The normalization type used for the normalization layer */ 560 enum class NormType 561 { 562 IN_MAP_1D, /**< Normalization applied within the same map in 1D region */ 563 IN_MAP_2D, /**< Normalization applied within the same map in 2D region */ 564 CROSS_MAP /**< Normalization applied cross maps */ 565 }; 566 567 /** Normalization type for Histogram of Oriented Gradients (HOG) */ 568 enum class HOGNormType 569 { 570 L2_NORM = 1, /**< L2-norm */ 571 L2HYS_NORM = 2, /**< L2-norm followed by clipping */ 572 L1_NORM = 3 /**< L1 norm */ 573 }; 574 575 /** Detection window used for the object detection. The detection window keeps the following information: 576 * 577 * -# Geometry of the rectangular window (x/y of top-left corner and width/height) 578 * -# Index of the class used for evaluating which class the detection window belongs to 579 * -# Confidence value (score) obtained with the classifier 580 */ 581 struct DetectionWindow 582 { 583 uint16_t x{ 0 }; /**< Top-left x coordinate */ 584 uint16_t y{ 0 }; /**< Top-left y coordinate */ 585 uint16_t width{ 0 }; /**< Width of the detection window */ 586 uint16_t height{ 0 }; /**< Height of the detection window */ 587 uint16_t idx_class{ 0 }; /**< Index of the class */ 588 float score{ 0.f }; /**< Confidence value for the detection window */ 589 }; 590 591 /** Dimension rounding type when down-scaling on CNNs 592 * @note Used in pooling and convolution layer 593 */ 594 enum class DimensionRoundingType 595 { 596 FLOOR, /**< Floor rounding */ 597 CEIL /**< Ceil rounding */ 598 }; 599 600 /** Available pooling types */ 601 enum class PoolingType 602 { 603 MAX, /**< Max Pooling */ 604 AVG, /**< Average Pooling */ 605 L2 /**< L2 Pooling */ 606 }; 607 608 /** Available non maxima suppression types */ 609 enum class NMSType 610 { 611 LINEAR, /**< Linear NMS */ 612 GAUSSIAN, /**< Gaussian NMS */ 613 ORIGINAL /**< Original NMS */ 614 }; 615 616 /** BoxWithNonMaximaSuppressionLimit Information class */ 617 class BoxNMSLimitInfo final 618 { 619 public: 620 /** Constructor 621 * 622 * @param[in] score_thresh (Optional) Score threshold. 623 * @param[in] nms (Optional) NMS value 624 * @param[in] detections (Optional) Number of detections 625 * @param[in] soft_nms_enabled (Optional) Enable SoftNMS 626 * @param[in] soft_nms_method (Optional) Soft NMS method 627 * @param[in] soft_nms_sigma (Optional) Soft NMS sigma value 628 * @param[in] soft_nms_min_score_thres (Optional) Soft NMS minimum score threshold 629 * @param[in] suppress_size (Optional) Filter out boxes based on their size. Defaults to false 630 * @param[in] min_size (Optional) Smaller boxes than min_size will be filtered out. Defaults to 1 631 * @param[in] im_width (Optional) Boxes whose centers (on the x axis) is beyond im_width will be filtered. Defaults to 1 632 * @param[in] im_height (Optional) Boxes whose centers (on the y axis) is beyond im_height will be filtered. Defaults to 1 633 */ 634 BoxNMSLimitInfo(float score_thresh = 0.05f, float nms = 0.3f, 635 int detections = 100, bool soft_nms_enabled = false, 636 NMSType soft_nms_method = NMSType::LINEAR, 637 float soft_nms_sigma = 0.5f, float soft_nms_min_score_thres = 0.001f, bool suppress_size = false, float min_size = 1.0f, float im_width = 1.0f, float im_height = 1.0f) _score_thresh(score_thresh)638 : _score_thresh(score_thresh), _nms(nms), _detections_per_im(detections), _soft_nms_enabled(soft_nms_enabled), _soft_nms_method(soft_nms_method), _soft_nms_sigma(soft_nms_sigma), 639 _soft_nms_min_score_thres(soft_nms_min_score_thres), _suppress_size(suppress_size), _min_size(min_size), _im_width(im_width), _im_height(im_height) 640 { 641 } 642 /** Get the score threshold */ score_thresh()643 float score_thresh() const 644 { 645 return _score_thresh; 646 } 647 /** Get the NMS */ nms()648 float nms() const 649 { 650 return _nms; 651 } 652 /** Get the number of detections */ detections_per_im()653 int detections_per_im() const 654 { 655 return _detections_per_im; 656 } 657 /** Check if soft NMS is enabled */ soft_nms_enabled()658 bool soft_nms_enabled() const 659 { 660 return _soft_nms_enabled; 661 } 662 /** Get soft NMS method */ soft_nms_method()663 NMSType soft_nms_method() const 664 { 665 return _soft_nms_method; 666 } 667 /** Get soft NMS sigma */ soft_nms_sigma()668 float soft_nms_sigma() const 669 { 670 return _soft_nms_sigma; 671 } 672 /** Get soft nms min score threshold */ soft_nms_min_score_thres()673 float soft_nms_min_score_thres() const 674 { 675 return _soft_nms_min_score_thres; 676 } 677 /** Get if NMS will suppress boxes based on their size/position */ suppress_size()678 bool suppress_size() const 679 { 680 return _suppress_size; 681 } 682 /** Get size suppression threshold */ min_size()683 float min_size() const 684 { 685 return _min_size; 686 } 687 /** Get image width (NMS may suppress boxes whose center sits beyond the image width) */ im_width()688 float im_width() const 689 { 690 return _im_width; 691 } 692 /** Get image height (NMS may suppress boxes whose center sits beyond the image height) */ im_height()693 float im_height() const 694 { 695 return _im_height; 696 } 697 698 private: 699 float _score_thresh; 700 float _nms; 701 int _detections_per_im; 702 bool _soft_nms_enabled; 703 NMSType _soft_nms_method; 704 float _soft_nms_sigma; 705 float _soft_nms_min_score_thres; 706 bool _suppress_size; 707 float _min_size; 708 float _im_width; 709 float _im_height; 710 }; 711 712 /** Padding and stride information class */ 713 class PadStrideInfo 714 { 715 public: 716 /** Constructor 717 * 718 * @param[in] stride_x (Optional) Stride, in elements, across x. Defaults to 1. 719 * @param[in] stride_y (Optional) Stride, in elements, across y. Defaults to 1. 720 * @param[in] pad_x (Optional) Padding, in elements, across x. Defaults to 0. 721 * @param[in] pad_y (Optional) Padding, in elements, across y. Defaults to 0. 722 * @param[in] round (Optional) Dimensions rounding. Defaults to @ref FLOOR. 723 */ 724 PadStrideInfo(unsigned int stride_x = 1, unsigned int stride_y = 1, 725 unsigned int pad_x = 0, unsigned int pad_y = 0, 726 DimensionRoundingType round = DimensionRoundingType::FLOOR) _stride(std::make_pair (stride_x,stride_y))727 : _stride(std::make_pair(stride_x, stride_y)), 728 _pad_left(pad_x), 729 _pad_top(pad_y), 730 _pad_right(pad_x), 731 _pad_bottom(pad_y), 732 _round_type(round) 733 { 734 } 735 /** Constructor 736 * 737 * @param[in] stride_x Stride, in elements, across x. 738 * @param[in] stride_y Stride, in elements, across y. 739 * @param[in] pad_left Padding across x on the left, in elements. 740 * @param[in] pad_top Padding across y on the top, in elements. 741 * @param[in] pad_right Padding across x on the right, in elements. 742 * @param[in] pad_bottom Padding across y on the bottom, in elements. 743 * @param[in] round Dimensions rounding. 744 */ PadStrideInfo(unsigned int stride_x,unsigned int stride_y,unsigned int pad_left,unsigned int pad_right,unsigned int pad_top,unsigned int pad_bottom,DimensionRoundingType round)745 PadStrideInfo(unsigned int stride_x, unsigned int stride_y, 746 unsigned int pad_left, unsigned int pad_right, 747 unsigned int pad_top, unsigned int pad_bottom, 748 DimensionRoundingType round) 749 : _stride(std::make_pair(stride_x, stride_y)), 750 _pad_left(pad_left), 751 _pad_top(pad_top), 752 _pad_right(pad_right), 753 _pad_bottom(pad_bottom), 754 _round_type(round) 755 { 756 } 757 /** Get the stride. 758 * 759 * @return a pair: stride x, stride y. 760 */ stride()761 std::pair<unsigned int, unsigned int> stride() const 762 { 763 return _stride; 764 } 765 /** Check whether the padding is symmetric. 766 * 767 * @return True if the padding is symmetric. 768 */ padding_is_symmetric()769 bool padding_is_symmetric() const 770 { 771 return (_pad_left == _pad_right) && (_pad_top == _pad_bottom); 772 } 773 /** Get the padding. 774 * 775 * @note This should only be used when the padding is symmetric. 776 * 777 * @return a pair: padding left/right, padding top/bottom 778 */ pad()779 std::pair<unsigned int, unsigned int> pad() const 780 { 781 //this accessor should be used only when padding is symmetric 782 ARM_COMPUTE_ERROR_ON(!padding_is_symmetric()); 783 return std::make_pair(_pad_left, _pad_top); 784 } 785 786 /** Get the left padding */ pad_left()787 unsigned int pad_left() const 788 { 789 return _pad_left; 790 } 791 /** Get the right padding */ pad_right()792 unsigned int pad_right() const 793 { 794 return _pad_right; 795 } 796 /** Get the top padding */ pad_top()797 unsigned int pad_top() const 798 { 799 return _pad_top; 800 } 801 /** Get the bottom padding */ pad_bottom()802 unsigned int pad_bottom() const 803 { 804 return _pad_bottom; 805 } 806 807 /** Get the rounding type */ round()808 DimensionRoundingType round() const 809 { 810 return _round_type; 811 } 812 813 /** Check whether this has any padding */ has_padding()814 bool has_padding() const 815 { 816 return (_pad_left != 0 || _pad_top != 0 || _pad_right != 0 || _pad_bottom != 0); 817 } 818 819 private: 820 std::pair<unsigned int, unsigned int> _stride; 821 unsigned int _pad_left; 822 unsigned int _pad_top; 823 unsigned int _pad_right; 824 unsigned int _pad_bottom; 825 826 DimensionRoundingType _round_type; 827 }; 828 829 /** PriorBox layer info */ 830 class PriorBoxLayerInfo final 831 { 832 public: 833 /** Default Constructor */ PriorBoxLayerInfo()834 PriorBoxLayerInfo() 835 : _min_sizes(), 836 _variances(), 837 _offset(), 838 _flip(true), 839 _clip(false), 840 _max_sizes(), 841 _aspect_ratios(), 842 _img_size(), 843 _steps() 844 { 845 } 846 /** Constructor 847 * 848 * @param[in] min_sizes Min sizes vector. 849 * @param[in] variances Variances vector. 850 * @param[in] offset Offset value. 851 * @param[in] flip (Optional) Flip the aspect ratios. 852 * @param[in] clip (Optional) Clip coordinates so that they're within [0,1]. 853 * @param[in] max_sizes (Optional) Max sizes vector. 854 * @param[in] aspect_ratios (Optional) Aspect ratios of the boxes. 855 * @param[in] img_size (Optional) Image size. 856 * @param[in] steps (Optional) Step values. 857 */ 858 PriorBoxLayerInfo(const std::vector<float> &min_sizes, const std::vector<float> &variances, float offset, bool flip = true, bool clip = false, 859 const std::vector<float> &max_sizes = {}, const std::vector<float> &aspect_ratios = {}, 860 const Coordinates2D &img_size = Coordinates2D{ 0, 0 }, const std::array<float, 2> &steps = { { 0.f, 0.f } }) _min_sizes(min_sizes)861 : _min_sizes(min_sizes), 862 _variances(variances), 863 _offset(offset), 864 _flip(flip), 865 _clip(clip), 866 _max_sizes(max_sizes), 867 _aspect_ratios(), 868 _img_size(img_size), 869 _steps(steps) 870 { 871 _aspect_ratios.push_back(1.); 872 for(unsigned int i = 0; i < aspect_ratios.size(); ++i) 873 { 874 float ar = aspect_ratios[i]; 875 bool already_exist = false; 876 for(auto ar_new : _aspect_ratios) 877 { 878 if(fabs(ar - ar_new) < 1e-6) 879 { 880 already_exist = true; 881 break; 882 } 883 } 884 if(!already_exist) 885 { 886 _aspect_ratios.push_back(ar); 887 if(flip) 888 { 889 _aspect_ratios.push_back(1.f / ar); 890 } 891 } 892 } 893 } 894 /** Get min sizes. */ min_sizes()895 std::vector<float> min_sizes() const 896 { 897 return _min_sizes; 898 } 899 /** Get min variances. */ variances()900 std::vector<float> variances() const 901 { 902 return _variances; 903 } 904 /** Get the step coordinates */ steps()905 std::array<float, 2> steps() const 906 { 907 return _steps; 908 } 909 /** Get the image size coordinates */ img_size()910 Coordinates2D img_size() const 911 { 912 return _img_size; 913 } 914 /** Get the offset */ offset()915 float offset() const 916 { 917 return _offset; 918 } 919 /** Get the flip value */ flip()920 bool flip() const 921 { 922 return _flip; 923 } 924 /** Get the clip value */ clip()925 bool clip() const 926 { 927 return _clip; 928 } 929 /** Get max sizes. */ max_sizes()930 std::vector<float> max_sizes() const 931 { 932 return _max_sizes; 933 } 934 /** Get aspect ratios. */ aspect_ratios()935 std::vector<float> aspect_ratios() const 936 { 937 return _aspect_ratios; 938 } 939 940 private: 941 std::vector<float> _min_sizes; 942 std::vector<float> _variances; 943 float _offset; 944 bool _flip; 945 bool _clip; 946 std::vector<float> _max_sizes; 947 std::vector<float> _aspect_ratios; 948 Coordinates2D _img_size; 949 std::array<float, 2> _steps; 950 }; 951 952 // Bounding Box [xmin, ymin, xmax, ymax] 953 using BBox = std::array<float, 4>; 954 // LabelBBox used for map label and bounding box 955 using LabelBBox = std::map<int, std::vector<BBox>>; 956 957 /** Available Detection Output code types */ 958 enum class DetectionOutputLayerCodeType 959 { 960 CORNER, /**< Use box corners */ 961 CENTER_SIZE, /**< Use box centers and size */ 962 CORNER_SIZE, /**< Use box centers and size */ 963 TF_CENTER /**< Use box centers and size but flip x and y co-ordinates */ 964 }; 965 966 /** Detection Output layer info */ 967 class DetectionOutputLayerInfo final 968 { 969 public: 970 /** Default Constructor */ DetectionOutputLayerInfo()971 DetectionOutputLayerInfo() 972 : _num_classes(), 973 _share_location(), 974 _code_type(DetectionOutputLayerCodeType::CORNER), 975 _keep_top_k(), 976 _nms_threshold(), 977 _top_k(), 978 _background_label_id(), 979 _confidence_threshold(), 980 _variance_encoded_in_target(false), 981 _eta(), 982 _num_loc_classes() 983 { 984 _num_loc_classes = _share_location ? 1 : _num_classes; 985 } 986 /** Constructor 987 * 988 * @param[in] num_classes Number of classes to be predicted. 989 * @param[in] share_location If true, bounding box are shared among different classes. 990 * @param[in] code_type Type of coding method for bbox. 991 * @param[in] keep_top_k Number of total bounding boxes to be kept per image after NMS step. 992 * @param[in] nms_threshold Threshold to be used in NMS. 993 * @param[in] top_k (Optional) Number of boxes per image with top confidence scores that are fed into the NMS algorithm. Default set to -1. 994 * @param[in] background_label_id (Optional) Background label ID. If there is no background class, set it as -1. 995 * @param[in] confidence_threshold (Optional) Only consider detections whose confidences are larger than a threshold. Default set to -FLT_MAX. 996 * @param[in] variance_encoded_in_target (Optional) If true, variance is encoded in target. Otherwise we need to adjust the predicted offset accordingly.Default set to false. 997 * @param[in] eta (Optional) Eta. 998 */ 999 DetectionOutputLayerInfo(int num_classes, bool share_location, DetectionOutputLayerCodeType code_type, int keep_top_k, float nms_threshold, int top_k = -1, int background_label_id = -1, 1000 float confidence_threshold = std::numeric_limits<float>::lowest(), bool variance_encoded_in_target = false, float eta = 1) _num_classes(num_classes)1001 : _num_classes(num_classes), 1002 _share_location(share_location), 1003 _code_type(code_type), 1004 _keep_top_k(keep_top_k), 1005 _nms_threshold(nms_threshold), 1006 _top_k(top_k), 1007 _background_label_id(background_label_id), 1008 _confidence_threshold(confidence_threshold), 1009 _variance_encoded_in_target(variance_encoded_in_target), 1010 _eta(eta), 1011 _num_loc_classes() 1012 { 1013 _num_loc_classes = _share_location ? 1 : _num_classes; 1014 } 1015 /** Get num classes. */ num_classes()1016 int num_classes() const 1017 { 1018 return _num_classes; 1019 } 1020 /** Get share location. */ share_location()1021 bool share_location() const 1022 { 1023 return _share_location; 1024 } 1025 /** Get detection output code type. */ code_type()1026 DetectionOutputLayerCodeType code_type() const 1027 { 1028 return _code_type; 1029 } 1030 /** Get if variance encoded in target. */ variance_encoded_in_target()1031 bool variance_encoded_in_target() const 1032 { 1033 return _variance_encoded_in_target; 1034 } 1035 /** Get the number of total bounding boxes to be kept per image. */ keep_top_k()1036 int keep_top_k() const 1037 { 1038 return _keep_top_k; 1039 } 1040 /** Get nms threshold. */ nms_threshold()1041 float nms_threshold() const 1042 { 1043 return _nms_threshold; 1044 } 1045 /** Get eta. */ eta()1046 float eta() const 1047 { 1048 return _eta; 1049 } 1050 /** Get background label ID. */ background_label_id()1051 int background_label_id() const 1052 { 1053 return _background_label_id; 1054 } 1055 /** Get confidence threshold. */ confidence_threshold()1056 float confidence_threshold() const 1057 { 1058 return _confidence_threshold; 1059 } 1060 /** Get top K. */ top_k()1061 int top_k() const 1062 { 1063 return _top_k; 1064 } 1065 /** Get number of location classes. */ num_loc_classes()1066 int num_loc_classes() const 1067 { 1068 return _num_loc_classes; 1069 } 1070 1071 private: 1072 int _num_classes; 1073 bool _share_location; 1074 DetectionOutputLayerCodeType _code_type; 1075 int _keep_top_k; 1076 float _nms_threshold; 1077 int _top_k; 1078 int _background_label_id; 1079 float _confidence_threshold; 1080 bool _variance_encoded_in_target; 1081 float _eta; 1082 int _num_loc_classes; 1083 }; 1084 1085 /** Detection Output layer info */ 1086 class DetectionPostProcessLayerInfo final 1087 { 1088 public: 1089 /** Default Constructor */ DetectionPostProcessLayerInfo()1090 DetectionPostProcessLayerInfo() 1091 : _max_detections(), 1092 _max_classes_per_detection(), 1093 _nms_score_threshold(), 1094 _iou_threshold(), 1095 _num_classes(), 1096 _scales_values(), 1097 _use_regular_nms(), 1098 _detection_per_class(), 1099 _dequantize_scores() 1100 { 1101 } 1102 /** Constructor 1103 * 1104 * @param[in] max_detections Number of total detection. 1105 * @param[in] max_classes_per_detection Number of total classes to be kept after NMS step. Used in the Fast Non-Max-Suppression 1106 * @param[in] nms_score_threshold Threshold to be used in NMS 1107 * @param[in] iou_threshold Threshold to be used during the intersection over union. 1108 * @param[in] num_classes Number of classes. 1109 * @param[in] scales_values Scales values used for decode center size boxes. 1110 * @param[in] use_regular_nms (Optional) Boolean to determinate if use regular or fast nms. Defaults to false. 1111 * @param[in] detection_per_class (Optional) Number of detection per class. Used in the Regular Non-Max-Suppression. Defaults to 100. 1112 * @param[in] dequantize_scores (Optional) If the scores need to be dequantized. Defaults to true. 1113 */ 1114 DetectionPostProcessLayerInfo(unsigned int max_detections, unsigned int max_classes_per_detection, float nms_score_threshold, float iou_threshold, unsigned int num_classes, 1115 std::array<float, 4> scales_values, bool use_regular_nms = false, unsigned int detection_per_class = 100, bool dequantize_scores = true) _max_detections(max_detections)1116 : _max_detections(max_detections), 1117 _max_classes_per_detection(max_classes_per_detection), 1118 _nms_score_threshold(nms_score_threshold), 1119 _iou_threshold(iou_threshold), 1120 _num_classes(num_classes), 1121 _scales_values(scales_values), 1122 _use_regular_nms(use_regular_nms), 1123 _detection_per_class(detection_per_class), 1124 _dequantize_scores(dequantize_scores) 1125 { 1126 } 1127 /** Get max detections. */ max_detections()1128 unsigned int max_detections() const 1129 { 1130 return _max_detections; 1131 } 1132 /** Get max_classes per detection. Used in the Fast Non-Max-Suppression.*/ max_classes_per_detection()1133 unsigned int max_classes_per_detection() const 1134 { 1135 return _max_classes_per_detection; 1136 } 1137 /** Get detection per class. Used in the Regular Non-Max-Suppression */ detection_per_class()1138 unsigned int detection_per_class() const 1139 { 1140 return _detection_per_class; 1141 } 1142 /** Get nms threshold. */ nms_score_threshold()1143 float nms_score_threshold() const 1144 { 1145 return _nms_score_threshold; 1146 } 1147 /** Get intersection over union threshold. */ iou_threshold()1148 float iou_threshold() const 1149 { 1150 return _iou_threshold; 1151 } 1152 /** Get num classes. */ num_classes()1153 unsigned int num_classes() const 1154 { 1155 return _num_classes; 1156 } 1157 /** Get if use regular nms. */ use_regular_nms()1158 bool use_regular_nms() const 1159 { 1160 return _use_regular_nms; 1161 } 1162 /** Get y scale value. */ scale_value_y()1163 float scale_value_y() const 1164 { 1165 // Saved as [y,x,h,w] 1166 return _scales_values[0]; 1167 } 1168 /** Get x scale value. */ scale_value_x()1169 float scale_value_x() const 1170 { 1171 // Saved as [y,x,h,w] 1172 return _scales_values[1]; 1173 } 1174 /** Get h scale value. */ scale_value_h()1175 float scale_value_h() const 1176 { 1177 // Saved as [y,x,h,w] 1178 return _scales_values[2]; 1179 } 1180 /** Get w scale value. */ scale_value_w()1181 float scale_value_w() const 1182 { 1183 // Saved as [y,x,h,w] 1184 return _scales_values[3]; 1185 } 1186 /** Get dequantize_scores value. */ dequantize_scores()1187 bool dequantize_scores() const 1188 { 1189 return _dequantize_scores; 1190 } 1191 1192 private: 1193 unsigned int _max_detections; 1194 unsigned int _max_classes_per_detection; 1195 float _nms_score_threshold; 1196 float _iou_threshold; 1197 unsigned int _num_classes; 1198 std::array<float, 4> _scales_values; 1199 bool _use_regular_nms; 1200 unsigned int _detection_per_class; 1201 bool _dequantize_scores; 1202 }; 1203 1204 /** Pooling Layer Information struct*/ 1205 struct PoolingLayerInfo 1206 { 1207 /** Default Constructor */ PoolingLayerInfoPoolingLayerInfo1208 PoolingLayerInfo() 1209 : pool_type(PoolingType::MAX), 1210 pool_size(Size2D()), 1211 data_layout(DataLayout::UNKNOWN), 1212 pad_stride_info(PadStrideInfo()), 1213 exclude_padding(false), 1214 is_global_pooling(false), 1215 fp_mixed_precision(false) 1216 { 1217 } 1218 /** Constructor 1219 * 1220 * @param[in] pool_type Pooling type @ref PoolingType. 1221 * @param[in] pool_size Pooling size, in elements, across x and y. 1222 * @param[in] data_layout Data layout used by the layer @ref DataLayout 1223 * @param[in] pad_stride_info (Optional) Padding and stride information @ref PadStrideInfo 1224 * @param[in] exclude_padding (Optional) Strategy when accounting padding in calculations. 1225 * True will exclude padding while false will not (Used in AVG/L2 pooling to determine the pooling area). 1226 * Defaults to false; 1227 * @param[in] fp_mixed_precision (Optional) Use wider accumulators (32 bit instead of 16 for FP16) to improve accuracy. 1228 */ 1229 explicit PoolingLayerInfo(PoolingType pool_type, 1230 unsigned int pool_size, 1231 DataLayout data_layout, 1232 PadStrideInfo pad_stride_info = PadStrideInfo(), 1233 bool exclude_padding = false, 1234 bool fp_mixed_precision = false) pool_typePoolingLayerInfo1235 : pool_type(pool_type), 1236 pool_size(Size2D(pool_size, pool_size)), 1237 data_layout(data_layout), 1238 pad_stride_info(pad_stride_info), 1239 exclude_padding(exclude_padding), 1240 is_global_pooling(false), 1241 fp_mixed_precision(fp_mixed_precision) 1242 { 1243 } 1244 1245 /** Constructor 1246 * 1247 * @param[in] pool_type Pooling type @ref PoolingType. 1248 * @param[in] pool_size Pooling size, in elements, across x and y. 1249 * @param[in] data_layout Data layout used by the layer @ref DataLayout 1250 * @param[in] pad_stride_info (Optional) Padding and stride information @ref PadStrideInfo 1251 * @param[in] exclude_padding (Optional) Strategy when accounting padding in calculations. 1252 * True will exclude padding while false will not (Used in AVG/L2 pooling to determine the pooling area). 1253 * Defaults to false; 1254 * @param[in] fp_mixed_precision (Optional) Use wider accumulators (32 bit instead of 16 for FP16) to improve accuracy. 1255 */ 1256 explicit PoolingLayerInfo(PoolingType pool_type, 1257 Size2D pool_size, 1258 DataLayout data_layout, 1259 PadStrideInfo pad_stride_info = PadStrideInfo(), 1260 bool exclude_padding = false, 1261 bool fp_mixed_precision = false) pool_typePoolingLayerInfo1262 : pool_type(pool_type), 1263 pool_size(pool_size), 1264 data_layout(data_layout), 1265 pad_stride_info(pad_stride_info), 1266 exclude_padding(exclude_padding), 1267 is_global_pooling(false), 1268 fp_mixed_precision(fp_mixed_precision) 1269 { 1270 } 1271 1272 /** Constructor 1273 * 1274 * @note This constructor is used for global pooling 1275 * 1276 * @param[in] pool_type Pooling type @ref PoolingType. 1277 * @param[in] data_layout Data layout used by the layer @ref DataLayout 1278 */ PoolingLayerInfoPoolingLayerInfo1279 explicit PoolingLayerInfo(PoolingType pool_type, DataLayout data_layout) 1280 : pool_type(pool_type), 1281 pool_size(Size2D()), 1282 data_layout(data_layout), 1283 pad_stride_info(PadStrideInfo(1, 1, 0, 0)), 1284 exclude_padding(false), 1285 is_global_pooling(true), 1286 fp_mixed_precision(false) 1287 { 1288 } 1289 1290 PoolingType pool_type; 1291 Size2D pool_size; 1292 DataLayout data_layout; 1293 PadStrideInfo pad_stride_info; 1294 bool exclude_padding; 1295 bool is_global_pooling; 1296 bool fp_mixed_precision; 1297 }; 1298 1299 /** ROI Pooling Layer Information class */ 1300 class ROIPoolingLayerInfo final 1301 { 1302 public: 1303 /** Constructor 1304 * 1305 * @param[in] pooled_width Pooled width of the layer. 1306 * @param[in] pooled_height Pooled height of the layer. 1307 * @param[in] spatial_scale Spatial scale to be applied to the ROI coordinates and dimensions. 1308 * @param[in] sampling_ratio Number of samples to include in each pooling region (if set to zero, a ceil(roi_dims/pooling_dims)) 1309 */ 1310 ROIPoolingLayerInfo(unsigned int pooled_width, unsigned int pooled_height, float spatial_scale, unsigned int sampling_ratio = 0) _pooled_width(pooled_width)1311 : _pooled_width(pooled_width), _pooled_height(pooled_height), _spatial_scale(spatial_scale), _sampling_ratio(sampling_ratio) 1312 { 1313 } 1314 /** Get the pooled width of the layer */ pooled_width()1315 unsigned int pooled_width() const 1316 { 1317 return _pooled_width; 1318 } 1319 /** Get the pooled height of the layer */ pooled_height()1320 unsigned int pooled_height() const 1321 { 1322 return _pooled_height; 1323 } 1324 /** Get the spatial scale */ spatial_scale()1325 float spatial_scale() const 1326 { 1327 return _spatial_scale; 1328 } 1329 /** Get sampling ratio */ sampling_ratio()1330 unsigned int sampling_ratio() const 1331 { 1332 return _sampling_ratio; 1333 } 1334 1335 private: 1336 unsigned int _pooled_width; 1337 unsigned int _pooled_height; 1338 float _spatial_scale; 1339 unsigned int _sampling_ratio; 1340 }; 1341 1342 /** Generate Proposals Information class */ 1343 class GenerateProposalsInfo 1344 { 1345 public: 1346 /** Constructor 1347 * 1348 * @param[in] im_width Width of the original image 1349 * @param[in] im_height Height of the original image 1350 * @param[in] im_scale Scale applied to the original image 1351 * @param[in] spatial_scale (Optional)Scale applied to the feature map. Defaults to 1.0 1352 * @param[in] pre_nms_topN (Optional)Number of the best scores to be selected from the transformations. Defaults to 6000. 1353 * @param[in] post_nms_topN (Optional)Number of the best scores to be selected from the NMS operation. Defaults to 300. 1354 * @param[in] nms_thres (Optional)NMS overlap threshold. Defaults to 0.7. 1355 * @param[in] min_size (Optional)Size used to validate the anchors produced. Defaults to 16. 1356 * @param[in] values_per_roi (Optional)Values used to represent a ROI(Region of interest). Defaults to 4. 1357 */ 1358 GenerateProposalsInfo(float im_width, float im_height, float im_scale, float spatial_scale = 1.0, int pre_nms_topN = 6000, int post_nms_topN = 300, float nms_thres = 0.7, float min_size = 16.0, 1359 size_t values_per_roi = 4) _im_height(im_height)1360 : _im_height(im_height), _im_width(im_width), _im_scale(im_scale), _spatial_scale(spatial_scale), _pre_nms_topN(pre_nms_topN), _post_nms_topN(post_nms_topN), _nms_thres(nms_thres), 1361 _min_size(min_size), _values_per_roi(values_per_roi) 1362 { 1363 } 1364 1365 /* Get the original height */ im_height()1366 float im_height() const 1367 { 1368 return _im_height; 1369 } 1370 /* Get the original width */ im_width()1371 float im_width() const 1372 { 1373 return _im_width; 1374 } 1375 /* Get the image scale */ im_scale()1376 float im_scale() const 1377 { 1378 return _im_scale; 1379 } 1380 /* Get the value of how many best scores to select (before NMS) */ pre_nms_topN()1381 int pre_nms_topN() const 1382 { 1383 return _pre_nms_topN; 1384 } 1385 /* Get the value of how many best scores to select (after NMS) */ post_nms_topN()1386 int post_nms_topN() const 1387 { 1388 return _post_nms_topN; 1389 } 1390 /* Get the NMS overlap threshold */ nms_thres()1391 float nms_thres() const 1392 { 1393 return _nms_thres; 1394 } 1395 /* Get the minimal size */ min_size()1396 float min_size() const 1397 { 1398 return _min_size; 1399 } 1400 /* Get the spatial scale to be applied to the feature maps */ spatial_scale()1401 float spatial_scale() const 1402 { 1403 return _spatial_scale; 1404 } 1405 /* Get the values used to represent a ROI(Region of interest)*/ values_per_roi()1406 size_t values_per_roi() const 1407 { 1408 return _values_per_roi; 1409 } 1410 1411 private: 1412 float _im_height; 1413 float _im_width; 1414 float _im_scale; 1415 float _spatial_scale; 1416 int _pre_nms_topN; 1417 int _post_nms_topN; 1418 float _nms_thres; 1419 float _min_size; 1420 size_t _values_per_roi; 1421 }; 1422 1423 /** ComputeAnchors information class */ 1424 class ComputeAnchorsInfo 1425 { 1426 public: 1427 /** Constructor 1428 * 1429 * @param[in] feat_width Feature map width 1430 * @param[in] feat_height Feature map height 1431 * @param[in] spatial_scale Feature map scale 1432 * @param[in] values_per_roi (Optional)Values used to represent a ROI(Region Of Interest). Defaults to 4 1433 */ 1434 ComputeAnchorsInfo(float feat_width, float feat_height, float spatial_scale, size_t values_per_roi = 4) _feat_height(feat_height)1435 : _feat_height(feat_height), 1436 _feat_width(feat_width), 1437 _spatial_scale(spatial_scale), 1438 _values_per_roi(values_per_roi) 1439 { 1440 } 1441 1442 /* Get the height of the feature map */ feat_height()1443 float feat_height() const 1444 { 1445 return _feat_height; 1446 } 1447 1448 /* Get the width of the feature map */ feat_width()1449 float feat_width() const 1450 { 1451 return _feat_width; 1452 } 1453 1454 /* Get the scale of the feature map */ spatial_scale()1455 float spatial_scale() const 1456 { 1457 return _spatial_scale; 1458 } 1459 1460 /* Get the values used to represent a ROI(Region Of Interest)*/ values_per_roi()1461 size_t values_per_roi() const 1462 { 1463 return _values_per_roi; 1464 } 1465 1466 private: 1467 float _feat_height; 1468 float _feat_width; 1469 float _spatial_scale; 1470 size_t _values_per_roi; 1471 }; 1472 1473 /** Bounding Box Transform information class */ 1474 class BoundingBoxTransformInfo final 1475 { 1476 public: 1477 /** Constructor 1478 * 1479 * @param[in] img_width Width of the original image 1480 * @param[in] img_height Height, of the original image 1481 * @param[in] scale Scale of the original image 1482 * @param[in] apply_scale (Optional)Re-apply scaling after transforming the boxes. Defaults to false 1483 * @param[in] weights (Optional)Weights [wx, wy, ww, wh] for the deltas. Defaults to all ones 1484 * @param[in] correct_transform_coords (Optional)Correct bounding box transform coordinates. Defaults to false 1485 * @param[in] bbox_xform_clip (Optional)Minimum bounding box width and height after bounding box transformation in log-space. Defaults to log(1000/16) 1486 */ 1487 BoundingBoxTransformInfo(float img_width, float img_height, float scale, bool apply_scale = false, const std::array<float, 4> weights = { { 1.f, 1.f, 1.f, 1.f } }, bool correct_transform_coords = 1488 false, 1489 float bbox_xform_clip = 1490 4.135166556742356f) _img_width(img_width)1491 : _img_width(img_width), _img_height(img_height), _scale(scale), _apply_scale(apply_scale), _correct_transform_coords(correct_transform_coords), _weights(weights), _bbox_xform_clip(bbox_xform_clip) 1492 { 1493 } 1494 weights()1495 std::array<float, 4> weights() const 1496 { 1497 return _weights; 1498 } 1499 bbox_xform_clip()1500 float bbox_xform_clip() const 1501 { 1502 return _bbox_xform_clip; 1503 } 1504 img_height()1505 float img_height() const 1506 { 1507 return _img_height; 1508 } 1509 img_width()1510 float img_width() const 1511 { 1512 return _img_width; 1513 } 1514 scale()1515 float scale() const 1516 { 1517 return _scale; 1518 } 1519 apply_scale()1520 bool apply_scale() const 1521 { 1522 return _apply_scale; 1523 } 1524 correct_transform_coords()1525 bool correct_transform_coords() const 1526 { 1527 return _correct_transform_coords; 1528 } 1529 1530 private: 1531 float _img_width; 1532 float _img_height; 1533 float _scale; 1534 bool _apply_scale; 1535 bool _correct_transform_coords; 1536 std::array<float, 4> _weights; 1537 float _bbox_xform_clip; 1538 }; 1539 1540 /** Activation Layer Information class */ 1541 class ActivationLayerInfo 1542 { 1543 public: 1544 /** Available activation functions */ 1545 enum class ActivationFunction 1546 { 1547 LOGISTIC, /**< Logistic ( \f$ f(x) = \frac{1}{1 + e^{-x}} \f$ ) */ 1548 TANH, /**< Hyperbolic tangent ( \f$ f(x) = a \cdot tanh(b \cdot x) \f$ ) */ 1549 RELU, /**< Rectifier ( \f$ f(x) = max(0,x) \f$ ) */ 1550 BOUNDED_RELU, /**< Upper Bounded Rectifier ( \f$ f(x) = min(a, max(0,x)) \f$ ) */ 1551 LU_BOUNDED_RELU, /**< Lower and Upper Bounded Rectifier ( \f$ f(x) = min(a, max(b,x)) \f$ ) */ 1552 LEAKY_RELU, /**< Leaky Rectifier ( \f$ f(x) = \begin{cases} \alpha x & \quad \text{if } x \text{ < 0}\\ x & \quad \text{if } x \geq \text{ 0 } \end{cases} \f$ ) */ 1553 SOFT_RELU, /**< Soft Rectifier ( \f$ f(x)= log(1+e^x) \f$ ) */ 1554 ELU, /**< Exponential Linear Unit ( \f$ f(x) = \begin{cases} \alpha (exp(x) - 1) & \quad \text{if } x \text{ < 0}\\ x & \quad \text{if } x \geq \text{ 0 } \end{cases} \f$ ) */ 1555 ABS, /**< Absolute ( \f$ f(x)= |x| \f$ ) */ 1556 SQUARE, /**< Square ( \f$ f(x)= x^2 \f$ )*/ 1557 SQRT, /**< Square root ( \f$ f(x) = \sqrt{x} \f$ )*/ 1558 LINEAR, /**< Linear ( \f$ f(x)= ax + b \f$ ) */ 1559 IDENTITY, /**< Identity ( \f$ f(x)= x \f$ ) */ 1560 HARD_SWISH /**< Hard-swish ( \f$ f(x) = (x * relu6(x+3))/6 \f$ ) */ 1561 }; 1562 1563 ActivationLayerInfo() = default; 1564 /** Default Constructor 1565 * 1566 * @param[in] f The activation function to use. 1567 * @param[in] a (Optional) The alpha parameter used by some activation functions 1568 * (@ref ActivationFunction::BOUNDED_RELU, @ref ActivationFunction::LU_BOUNDED_RELU, @ref ActivationFunction::LINEAR, @ref ActivationFunction::TANH). 1569 * @param[in] b (Optional) The beta parameter used by some activation functions (@ref ActivationFunction::LINEAR, @ref ActivationFunction::LU_BOUNDED_RELU, @ref ActivationFunction::TANH). 1570 */ 1571 ActivationLayerInfo(ActivationFunction f, float a = 0.0f, float b = 0.0f) _act(f)1572 : _act(f), _a(a), _b(b), _enabled(true) 1573 { 1574 } 1575 /** Get the type of activation function */ activation()1576 ActivationFunction activation() const 1577 { 1578 return _act; 1579 } 1580 /** Get the alpha value */ a()1581 float a() const 1582 { 1583 return _a; 1584 } 1585 /** Get the beta value */ b()1586 float b() const 1587 { 1588 return _b; 1589 } 1590 /** Check if initialised */ enabled()1591 bool enabled() const 1592 { 1593 return _enabled; 1594 } 1595 1596 private: 1597 ActivationFunction _act = { ActivationLayerInfo::ActivationFunction::IDENTITY }; 1598 float _a = {}; 1599 float _b = {}; 1600 bool _enabled = { false }; 1601 }; 1602 1603 /** Fully connected layer info */ 1604 struct FullyConnectedLayerInfo 1605 { 1606 DataLayout weights_trained_layout{ DataLayout::NCHW }; /**< Layout that the weights have been trained with. */ 1607 bool transpose_weights{ true }; /**< Transpose weights if true. */ 1608 bool are_weights_reshaped{ false }; /**< Reshape the weights tensor if false. */ 1609 bool retain_internal_weights{ false }; /**< Retain internal reshaped weights. */ 1610 bool fp_mixed_precision{ false }; /**< Use wider accumulators (32 bit instead of 16 for FP16) to improve accuracy. */ 1611 ActivationLayerInfo activation_info{}; /**< Fused activation to apply after the matrix multiplication. */ 1612 1613 /** Sets the weights trained data layout 1614 * 1615 * @param[in] layout Data layout that the weights were trained with 1616 * 1617 * @return Updated object 1618 */ set_weights_trained_layoutFullyConnectedLayerInfo1619 FullyConnectedLayerInfo &set_weights_trained_layout(DataLayout layout) 1620 { 1621 weights_trained_layout = layout; 1622 return *this; 1623 } 1624 /** Sets the transpose weights flag 1625 * 1626 * @param[in] should_transpose_weights Boolean flag indicating if weights should be transposed 1627 * 1628 * @return Updated object 1629 */ set_transpose_weightsFullyConnectedLayerInfo1630 FullyConnectedLayerInfo &set_transpose_weights(bool should_transpose_weights) 1631 { 1632 transpose_weights = should_transpose_weights; 1633 return *this; 1634 } 1635 }; 1636 1637 /** Normalization Layer Information class */ 1638 class NormalizationLayerInfo 1639 { 1640 public: 1641 /** Default Constructor 1642 * 1643 * @param[in] type The normalization type. Can be @ref NormType::IN_MAP_1D, @ref NormType::IN_MAP_2D or @ref NormType::CROSS_MAP 1644 * @param[in] norm_size The normalization size is the number of elements to normalize across. Defaults to 5. 1645 * @param[in] alpha (Optional) Alpha parameter used by normalization equation. Defaults to 0.0001. 1646 * @param[in] beta (Optional) Beta parameter used by normalization equation. Defaults to 0.5. 1647 * @param[in] kappa (Optional) Kappa parameter used by [Krichevksy 2012] Across Channel Local Brightness Normalization equation. 1648 * @param[in] is_scaled (Optional) Boolean that specifies if alpha will be scaled by the normalization size or not. 1649 * Should be false to follow [Krichevksy 2012]. 1650 */ 1651 NormalizationLayerInfo(NormType type, uint32_t norm_size = 5, float alpha = 0.0001f, float beta = 0.5f, float kappa = 1.f, bool is_scaled = true) _type(type)1652 : _type(type), _norm_size(norm_size), _alpha(alpha), _beta(beta), _kappa(kappa), _is_scaled(is_scaled) 1653 { 1654 } 1655 /** Get the normalization type */ type()1656 NormType type() const 1657 { 1658 return _type; 1659 } 1660 /** Get the normalization size */ norm_size()1661 uint32_t norm_size() const 1662 { 1663 return _norm_size; 1664 } 1665 /** Get the alpha value */ alpha()1666 float alpha() const 1667 { 1668 return _alpha; 1669 } 1670 /** Get the beta value */ beta()1671 float beta() const 1672 { 1673 return _beta; 1674 } 1675 /** Get the kappa value */ kappa()1676 float kappa() const 1677 { 1678 return _kappa; 1679 } 1680 /** Get the is_scaled value */ is_scaled()1681 bool is_scaled() const 1682 { 1683 return _is_scaled; 1684 } 1685 /** Check if normalization is cross map */ is_cross_map()1686 bool is_cross_map() const 1687 { 1688 return _type == NormType::CROSS_MAP; 1689 } 1690 /** Check if normalization is not cross map */ is_in_map()1691 bool is_in_map() const 1692 { 1693 return !is_cross_map(); 1694 } 1695 /** Return the scaling factor of the normalization function. 1696 * 1697 * If is_scaled is set to false then [Krichevksy 2012] normalization scaling is performed, 1698 * where alpha is returned plainly, else alpha is scaled by the total number of elements used for the normalization. 1699 * 1700 * @return The normalization scaling factor. 1701 */ scale_coeff()1702 float scale_coeff() const 1703 { 1704 const uint32_t size = (_type == NormType::IN_MAP_2D) ? _norm_size * _norm_size : _norm_size; 1705 return (_is_scaled) ? (_alpha / size) : _alpha; 1706 } 1707 1708 private: 1709 NormType _type; 1710 uint32_t _norm_size; 1711 float _alpha; 1712 float _beta; 1713 float _kappa; 1714 bool _is_scaled; 1715 }; 1716 1717 class StridedSliceLayerInfo 1718 { 1719 public: 1720 /** Default Constructor 1721 * 1722 * @param[in] begin_mask (Optional) If the ith bit of begin_mask is set, starts[i] is ignored and the fullest possible range in that dimension is used instead. 1723 * @param[in] end_mask (Optional) If the ith bit of end_mask is set, ends[i] is ignored and the fullest possible range in that dimension is used instead. 1724 * @param[in] shrink_axis_mask (Optional) If the ith bit of shrink_axis_mask is set, it implies that the ith specification shrinks the dimensionality by 1. 1725 */ 1726 StridedSliceLayerInfo(int32_t begin_mask = 0, int32_t end_mask = 0, int32_t shrink_axis_mask = 0) _begin_mask(begin_mask)1727 : _begin_mask(begin_mask), _end_mask(end_mask), _shrink_axis_mask(shrink_axis_mask) 1728 { 1729 } 1730 1731 /* Get the begin mask value */ begin_mask()1732 int32_t begin_mask() const 1733 { 1734 return _begin_mask; 1735 } 1736 1737 /* Get the end mask value */ end_mask()1738 int32_t end_mask() const 1739 { 1740 return _end_mask; 1741 } 1742 1743 /* Get the shrink axis mask value */ shrink_axis_mask()1744 int32_t shrink_axis_mask() const 1745 { 1746 return _shrink_axis_mask; 1747 } 1748 1749 private: 1750 int32_t _begin_mask; 1751 int32_t _end_mask; 1752 int32_t _shrink_axis_mask; 1753 }; 1754 1755 /** Convolution Layer Weights Information class. This class stores the necessary information to compute convolution layer when the weights are already reshaped */ 1756 class WeightsInfo 1757 { 1758 public: 1759 /** Default constructor */ WeightsInfo()1760 WeightsInfo() 1761 : _are_reshaped(false), _kernel_width(0), _kernel_height(0), _num_kernels(0), _retain_internal_weights(false) 1762 { 1763 } 1764 /** Constructor 1765 * 1766 * @param[in] are_reshaped True if the weights have been reshaped 1767 * @param[in] kernel_width Kernel width. 1768 * @param[in] kernel_height Kernel height. 1769 * @param[in] num_kernels Number of convolution kernels. 1770 * @param[in] retain_internal_weights (Optional) True if internal reshaped weights must be retained. Used for reconfiguration purposes. Default is false. 1771 */ 1772 WeightsInfo(bool are_reshaped, unsigned int kernel_width, unsigned int kernel_height, unsigned int num_kernels, bool retain_internal_weights = false) _are_reshaped(are_reshaped)1773 : _are_reshaped(are_reshaped), _kernel_width(kernel_width), _kernel_height(kernel_height), _num_kernels(num_kernels), _retain_internal_weights(retain_internal_weights) 1774 { 1775 } 1776 /** Flag which specifies if the weights tensor has been reshaped. 1777 * 1778 * @return True if the weights tensors has been reshaped 1779 */ are_reshaped()1780 bool are_reshaped() const 1781 { 1782 return _are_reshaped; 1783 }; 1784 /** Return the number of convolution kernels 1785 * 1786 * @return The number of convolution kernels 1787 */ num_kernels()1788 unsigned int num_kernels() const 1789 { 1790 return _num_kernels; 1791 }; 1792 /** Return the width and height of the kernel 1793 * 1794 * @return The width and height of the kernel 1795 */ kernel_size()1796 std::pair<unsigned int, unsigned int> kernel_size() const 1797 { 1798 return std::make_pair(_kernel_width, _kernel_height); 1799 } retain_internal_weights()1800 bool retain_internal_weights() const 1801 { 1802 return _retain_internal_weights; 1803 } 1804 1805 private: 1806 bool _are_reshaped; 1807 unsigned int _kernel_width; 1808 unsigned int _kernel_height; 1809 unsigned int _num_kernels; 1810 bool _retain_internal_weights; 1811 }; 1812 1813 /** GEMM reshape information class. This class stores the necessary information about matrix A and matrix B reshape. 1814 * 1815 * The matrix A can only be reshaped through @ref CLGEMMReshapeLHSMatrixKernel or @ref NEGEMMInterleave4x4Kernel or @ref GCGEMMInterleave4x4Kernel 1816 * Note: Optionally just for @ref CLGEMMReshapeLHSMatrixKernel is it possible to set mult_interleave4x4_height, the multiplication factor for the height of the 4x4 interleaved block 1817 * 1818 * The matrix B can only be reshaped through @ref CLGEMMReshapeRHSMatrixKernel or @ref NEGEMMTranspose1xWKernel or @ref GCGEMMTranspose1xWKernel 1819 * Note: Optionally just for @ref CLGEMMReshapeRHSMatrixKernel is it possible to set mult_transpose1xW_width, the multiplication factor for the width of the 1xW transposed block 1820 * 1821 */ 1822 class GEMMReshapeInfo final 1823 { 1824 public: 1825 /** Default constructor */ GEMMReshapeInfo()1826 GEMMReshapeInfo() 1827 : _m(1), _n(1), _k(1), _mult_transpose1xW_width(1), _mult_interleave4x4_height(1), _depth_output_gemm3d(0), _reinterpret_input_as_3d(false), _broadcast_bias(false) 1828 { 1829 } 1830 /** Constructor 1831 * 1832 * @param[in] m Number of matrix A rows 1833 * @param[in] n Number of matrix B columns 1834 * @param[in] k Number of matrix A columns or matrix B rows 1835 * @param[in] mult_transpose1xW_width (Optional) Multiplication factor for the width of the 1xW transposed block 1836 * @param[in] mult_interleave4x4_height (Optional) Multiplication factor for the height of the 4x4 interleaved block 1837 * @param[in] depth_output_gemm3d (Optional) Depth (third dimension) of the output tensor to be used with the GEMM3D kernel. 1838 * If 0 the output will not be reinterpreted as 3D. Default 0 1839 * @param[in] reinterpret_input_as_3d (Optional) Reinterpret the input as 3D tensor. (i.e. this flag should be set to true when GEMM is used 1840 * to perform 1x1 convolutions with the NHWC data layout) 1841 * @param[in] broadcast_bias (Optional) Broadcast the shape of the bias tensor from a vector to a matrix. 1842 */ 1843 GEMMReshapeInfo(int m, int n, int k, int mult_transpose1xW_width = 1, int mult_interleave4x4_height = 1, int depth_output_gemm3d = 0, bool reinterpret_input_as_3d = false, bool broadcast_bias = false) _m(m)1844 : _m(m), _n(n), _k(k), _mult_transpose1xW_width(mult_transpose1xW_width), _mult_interleave4x4_height(mult_interleave4x4_height), _depth_output_gemm3d(depth_output_gemm3d), 1845 _reinterpret_input_as_3d(reinterpret_input_as_3d), _broadcast_bias(broadcast_bias) 1846 { 1847 } 1848 /** Number of matrix A rows 1849 * 1850 * @return the number of matrix A rows 1851 */ m()1852 int m() const 1853 { 1854 return _m; 1855 } 1856 /** Number of matrix B columns 1857 * 1858 * @return the number of matrix B columns 1859 */ n()1860 int n() const 1861 { 1862 return _n; 1863 } 1864 /** Number of matrix A columns or matrix B rows 1865 * 1866 * @return the number of matrix A columns or matrix B rows 1867 */ k()1868 int k() const 1869 { 1870 return _k; 1871 } 1872 /** Multiplication factor for the width of the 1xW transposed block 1873 * 1874 * @return the multiplication factor for the width of the 1xW transposed block 1875 */ mult_transpose1xW_width()1876 int mult_transpose1xW_width() const 1877 { 1878 return _mult_transpose1xW_width; 1879 } 1880 /** Multiplication factor for the height of the 4x4 interleaved block 1881 * 1882 * @return the multiplication factor for the height of the 4x4 interleaved block 1883 */ mult_interleave4x4_height()1884 int mult_interleave4x4_height() const 1885 { 1886 return _mult_interleave4x4_height; 1887 } 1888 /** Depth (third dimension) of the output tensor to be used with the GEMM3D kernel 1889 * 1890 * @note GEMM3D kernel is used when the output has to be reinterpret as 3D tensor. In that case: 1891 * m = depth_output_gemm3d * output_height 1892 * 1893 * @return the depth of the output tensor to be used with the GEMM3D kernel 1894 */ depth_output_gemm3d()1895 int depth_output_gemm3d() const 1896 { 1897 return _depth_output_gemm3d; 1898 } 1899 /** Flag which specifies if the input tensor has to be reinterpreted as 3D 1900 * 1901 * @return True if the input tensor has to be reinterpreted as 3D tensor 1902 */ reinterpret_input_as_3d()1903 bool reinterpret_input_as_3d() const 1904 { 1905 return _reinterpret_input_as_3d; 1906 }; 1907 /** Flag which specifies whether to broadcast the shape of the bias tensor. 1908 * 1909 * @return True if the shape of the bias tensor is to be broadcasted. 1910 */ broadcast_bias()1911 bool broadcast_bias() const 1912 { 1913 return _broadcast_bias; 1914 }; 1915 1916 private: 1917 int _m; 1918 int _n; 1919 int _k; 1920 int _mult_transpose1xW_width; 1921 int _mult_interleave4x4_height; 1922 int _depth_output_gemm3d; 1923 bool _reinterpret_input_as_3d; 1924 bool _broadcast_bias; 1925 }; 1926 1927 struct DepthwiseConvolutionReshapeInfo 1928 { 1929 unsigned int c0{ 1 }; /**< Number of channels processed by the depth-wise convolution */ 1930 bool transpose{ false }; /**< True if the block MxC0 (where M is the area of the filter i.e. KwxKh) has to be transposed */ 1931 }; 1932 1933 /** GEMMLowp output stage type */ 1934 enum class GEMMLowpOutputStageType 1935 { 1936 NONE, /**< No quantization */ 1937 QUANTIZE_DOWN, /**< Quantize using an integer multiplication */ 1938 QUANTIZE_DOWN_FIXEDPOINT, /**< Quantize using a fixed point multiplication */ 1939 QUANTIZE_DOWN_FLOAT /**< Quantize using a floating point multiplication */ 1940 }; 1941 1942 /** GEMMLowp output stage info */ 1943 struct GEMMLowpOutputStageInfo 1944 { 1945 GEMMLowpOutputStageType type{ GEMMLowpOutputStageType::NONE }; /**< GEMMLowp output stage type */ 1946 int32_t gemmlowp_offset{ 0 }; /**< GEMMLowp output stage offset used for quantizing to QASYMM8 */ 1947 int32_t gemmlowp_multiplier{ 0 }; /**< GEMMLowp output stage multiplier used for quantizing to QASYMM8 */ 1948 int32_t gemmlowp_shift{ 0 }; /**< GEMMLowp output stage shift used for quantizing to uint8 */ 1949 int32_t gemmlowp_min_bound{ std::numeric_limits<int32_t>::lowest() }; /**< GEMMLowp min value used to saturate down the output result before converting back to QASYMM8 */ 1950 int32_t gemmlowp_max_bound{ std::numeric_limits<int32_t>::max() }; /**< GEMMLowp max value used to saturate down the output result before converting back to QASYMM8 */ 1951 std::vector<int32_t> gemmlowp_multipliers{}; /**< GEMMLowp output stage multiplier used for quantizing to QASYMM8 */ 1952 std::vector<int32_t> gemmlowp_shifts{}; /**< GEMMLowp output stage multiplier used for quantizing to QASYMM8 */ 1953 float gemmlowp_real_multiplier{ 0 }; /**< GEMMLowp output stage real multiplier used for quantizing to QASYMM8 */ 1954 bool is_quantized_per_channel{ false }; /**< GEMMLowp quantized per-channel flag */ 1955 DataType output_data_type{ DataType::UNKNOWN }; /**< Output tensor data type to use if the output is not initialized */ 1956 }; 1957 1958 /** GEMM LHS (Left Hand Side) matrix information */ 1959 struct GEMMLHSMatrixInfo 1960 { 1961 GEMMLHSMatrixInfo() = default; GEMMLHSMatrixInfoGEMMLHSMatrixInfo1962 GEMMLHSMatrixInfo(unsigned int m, unsigned int k, unsigned int v, bool trans, bool inter) 1963 : m0(m), k0(k), v0(v), transpose(trans), interleave(inter) 1964 { 1965 } 1966 unsigned int m0{ 1 }; /**< Number of rows processed by the matrix multiplication */ 1967 unsigned int k0{ 1 }; /**< Number of partial accumulations performed by the matrix multiplication */ 1968 unsigned int v0{ 1 }; /**< Number of vertical blocks of size (m0xk0) stored on the same output row */ 1969 bool transpose{ true }; /**< True if the (m0xk0) block has to be transposed before been stored */ 1970 bool interleave{ true }; /**< True if the v0 (m0xk0) blocks have to be interleaved in the output row */ 1971 }; 1972 1973 /** GEMM RHS (Right Hand Side) matrix information */ 1974 struct GEMMRHSMatrixInfo 1975 { 1976 GEMMRHSMatrixInfo() = default; GEMMRHSMatrixInfoGEMMRHSMatrixInfo1977 GEMMRHSMatrixInfo(unsigned int n, unsigned int k, unsigned int h, bool trans, bool inter, bool export_to_cl_img) 1978 : n0(n), k0(k), h0(h), transpose(trans), interleave(inter), export_to_cl_image(export_to_cl_img) 1979 { 1980 } 1981 unsigned int n0{ 1 }; /**< Number of columns processed by the matrix multiplication */ 1982 unsigned int k0{ 1 }; /**< Number of partial accumulations performed by the matrix multiplication */ 1983 unsigned int h0{ 1 }; /**< Number of horizontal blocks of size (k0xn0) stored on the same output row */ 1984 bool transpose{ true }; /**< True if the (k0xn0) block has to be transposed before been stored */ 1985 bool interleave{ true }; /**< True if the h0 (k0xn0) blocks have to be interleaved in the output row */ 1986 bool export_to_cl_image{ false }; /**< True if the reshaped rhs has to be exported to cl_image. n0 must be equal to 4 */ 1987 }; 1988 1989 /** GEMM information class. This class stores the necessary information to compute GEMM functions 1990 * 1991 * This object also contains the information about how matrix A and matrix B have been reshaped 1992 * 1993 */ 1994 class GEMMInfo 1995 { 1996 public: 1997 /** Default constructor */ GEMMInfo()1998 GEMMInfo() noexcept 1999 : _is_a_reshaped(false), 2000 _is_b_reshaped(false), 2001 _reshape_b_only_on_first_run(true), 2002 _depth_output_gemm3d(0), 2003 _reinterpret_input_as_3d(false), 2004 _retain_internal_weights(false), 2005 _gemmlowp_output_stage(), 2006 _fp_mixed_precision(false), 2007 _broadcast_bias(false), 2008 _pretranpose_B(true), 2009 _activation_info() 2010 { 2011 } 2012 /** Constructor 2013 * 2014 * @param[in] is_a_reshaped True if the matrix A has been reshaped 2015 * @param[in] is_b_reshaped True if the matrix B has been reshaped 2016 * @param[in] reshape_b_only_on_first_run Reshape matrix B only for the first run 2017 * @param[in] depth_output_gemm3d (Optional) Depth (third dimension) of the output tensor to be used with the GEMM3D kernel 2018 * If 0 the output will not be reinterpreted as 3D. Default 0 2019 * @param[in] reinterpret_input_as_3d (Optional) Reinterpret the input as 3D tensor. (i.e. this flag should be set to true when GEMM is used 2020 * to perform 1x1 convolutions with the NHWC data layout) 2021 * @param[in] retain_internal_weights (Optional) Retain the weights tensor from previous run 2022 * @param[in] gemmlowp_output_stage (Optional) GEMMLowp Output stage info 2023 * @param[in] fp_mixed_precision (Optional) Use wider accumulators (32 bit instead of 16 for FP16) to improve accuracy. 2024 * @param[in] broadcast_bias (Optional) Broadcast the shape of the bias tensor from a vector to a matrix. 2025 * @param[in] activation_info (Optional) Activation to apply after the matrix multiplication 2026 */ 2027 GEMMInfo(bool is_a_reshaped, bool is_b_reshaped, bool reshape_b_only_on_first_run, int depth_output_gemm3d = 0, bool reinterpret_input_as_3d = false, bool retain_internal_weights = false, 2028 GEMMLowpOutputStageInfo gemmlowp_output_stage = GEMMLowpOutputStageInfo(), bool fp_mixed_precision = false, bool broadcast_bias = false, 2029 const ActivationLayerInfo &activation_info = ActivationLayerInfo()) noexcept _is_a_reshaped(is_a_reshaped)2030 : _is_a_reshaped(is_a_reshaped), 2031 _is_b_reshaped(is_b_reshaped), 2032 _reshape_b_only_on_first_run(reshape_b_only_on_first_run), 2033 _depth_output_gemm3d(depth_output_gemm3d), 2034 _reinterpret_input_as_3d(reinterpret_input_as_3d), 2035 _retain_internal_weights(retain_internal_weights), 2036 _gemmlowp_output_stage(gemmlowp_output_stage), 2037 _fp_mixed_precision(fp_mixed_precision), 2038 _broadcast_bias(broadcast_bias), 2039 _pretranpose_B(reshape_b_only_on_first_run), 2040 _activation_info(activation_info) 2041 { 2042 } 2043 /** Flag which specifies if the matrix A has been reshaped 2044 * 2045 * @return True if the matrix A has been reshaped 2046 */ is_a_reshaped()2047 bool is_a_reshaped() const 2048 { 2049 return _is_a_reshaped; 2050 }; 2051 /** Flag which specifies if the matrix B has been reshaped 2052 * 2053 * @return True if the matrix B has been reshaped 2054 */ is_b_reshaped()2055 bool is_b_reshaped() const 2056 { 2057 return _is_b_reshaped; 2058 }; 2059 /** Flag which specifies if the reshape of matrix B should executed only for the first 2060 * 2061 * @note This flag could be set to TRUE when GEMM is used to accelerate convolution layer 2062 * 2063 * @return True if the reshaped of matrix B happens only for the first run 2064 */ reshape_b_only_on_first_run()2065 bool reshape_b_only_on_first_run() const 2066 { 2067 return _reshape_b_only_on_first_run; 2068 }; 2069 /** Depth of the output when GEMM output is reinterpreted as 3D tensor 2070 * 2071 * @return the depth of the output tensor 2072 */ depth_output_gemm3d()2073 int depth_output_gemm3d() const 2074 { 2075 return _depth_output_gemm3d; 2076 }; 2077 /** Flag which specifies if the input tensor has to be reinterpreted as 3D 2078 * 2079 * @return True if the input tensor has to be reinterpreted as 3D tensor 2080 */ reinterpret_input_as_3d()2081 bool reinterpret_input_as_3d() const 2082 { 2083 return _reinterpret_input_as_3d; 2084 }; 2085 /** Flag which specifies if the weights tensor has to be retained from previous run 2086 * 2087 * @return True if the weights tensor has to be retained 2088 */ retain_internal_weights()2089 bool retain_internal_weights() const 2090 { 2091 return _retain_internal_weights; 2092 }; 2093 /** GEMMLowp output stage 2094 * 2095 * @return the GEMMLowp output stage info 2096 */ gemmlowp_output_stage()2097 GEMMLowpOutputStageInfo gemmlowp_output_stage() const 2098 { 2099 return _gemmlowp_output_stage; 2100 }; 2101 /** Sets GEMMLowp output stage 2102 * 2103 * @param[in] output_stage Output stage to set 2104 */ set_gemmlowp_output_stage(GEMMLowpOutputStageInfo & output_stage)2105 void set_gemmlowp_output_stage(GEMMLowpOutputStageInfo &output_stage) 2106 { 2107 _gemmlowp_output_stage = output_stage; 2108 }; 2109 /** Flag which specifies if a wider accumulator should be used. 2110 * 2111 * @return True if a wider accumulator has to be used 2112 */ fp_mixed_precision()2113 bool fp_mixed_precision() const 2114 { 2115 return _fp_mixed_precision; 2116 }; 2117 /** Flag which specifies whether to broadcast the shape of the bias tensor. 2118 * 2119 * @return True if the shape of the bias tensor is to be broadcasted. 2120 */ broadcast_bias()2121 bool broadcast_bias() const 2122 { 2123 return _broadcast_bias; 2124 }; 2125 /** Flag which specifies whether b should be pre-transposed if supported. 2126 * 2127 * @return True if b should be pre-transposed else false. 2128 */ pretranpose_B()2129 bool pretranpose_B() const 2130 { 2131 return _pretranpose_B; 2132 }; 2133 /** Set pre-transpose b flag 2134 * 2135 * @param[in] flag Flag to set 2136 */ set_pretranpose_B(bool flag)2137 void set_pretranpose_B(bool flag) 2138 { 2139 _pretranpose_B = flag; 2140 } 2141 /** Activation layer to apply after the matrix multiplication 2142 * 2143 * @return ActivationLayerInfo object 2144 */ activation_info()2145 ActivationLayerInfo activation_info() const 2146 { 2147 return _activation_info; 2148 } 2149 /** Set activation layer info 2150 * 2151 * @param[in] activation_info ActivationLayerInfo object to set 2152 */ set_activation_info(const ActivationLayerInfo & activation_info)2153 void set_activation_info(const ActivationLayerInfo &activation_info) 2154 { 2155 _activation_info = activation_info; 2156 } 2157 2158 private: 2159 bool _is_a_reshaped; 2160 bool _is_b_reshaped; 2161 bool _reshape_b_only_on_first_run; 2162 int _depth_output_gemm3d; 2163 bool _reinterpret_input_as_3d; 2164 bool _retain_internal_weights; 2165 GEMMLowpOutputStageInfo _gemmlowp_output_stage; 2166 bool _fp_mixed_precision; 2167 bool _broadcast_bias; 2168 bool _pretranpose_B; 2169 ActivationLayerInfo _activation_info; 2170 }; 2171 2172 /** Winograd information */ 2173 struct WinogradInfo 2174 { 2175 /** Default constructor 2176 * 2177 * @param[in] output_tile_sz Width and height of the output tile 2178 * @param[in] kernel_sz Width and height of the kernel 2179 * @param[in] input_dims Width and height of the input tensor before the convolution is applied 2180 * @param[in] conv_info Convolution info (Pads, strides) 2181 * @param[in] data_layout Data layout to use for the output tensor once the convolution has been applied 2182 */ WinogradInfoWinogradInfo2183 WinogradInfo(Size2D output_tile_sz, Size2D kernel_sz, Size2D input_dims, PadStrideInfo conv_info, DataLayout data_layout) 2184 : output_tile_size(output_tile_sz), kernel_size(kernel_sz), input_dimensions(input_dims), convolution_info(conv_info), output_data_layout(data_layout) 2185 { 2186 } 2187 2188 Size2D output_tile_size{}; /**< Width and height of the output tile */ 2189 Size2D kernel_size{}; /**< Width and height of the kernel*/ 2190 Size2D input_dimensions{}; /**< Width and height of the input tensor before the convolution is applied */ 2191 PadStrideInfo convolution_info{}; /**< Convolution info (Pads, strides,...) */ 2192 DataLayout output_data_layout{ DataLayout::NCHW }; /**< Data layout to use for the output tensor once the convolution has been applied (NCHW or NHWC) */ 2193 }; 2194 2195 /** IO formatting information class*/ 2196 struct IOFormatInfo 2197 { 2198 /** Precision type used when printing floating point numbers */ 2199 enum class PrecisionType 2200 { 2201 Default, /**< Default precision to the one that the current stream has */ 2202 Custom, /**< Custom precision specified by the user using the precision parameter */ 2203 Full /**< The maximum precision of the floating point representation */ 2204 }; 2205 2206 /** Specifies the area to be printed, used by Tensor objects */ 2207 enum class PrintRegion 2208 { 2209 ValidRegion, /**< Prints the valid region of the Tensor object */ 2210 NoPadding, /**< Prints the Tensor object without the padding */ 2211 Full /**< Print the tensor object including padding */ 2212 }; 2213 2214 /** Construct a set of IO formatting information. 2215 * 2216 * @param[in] print_region Area to be printed. Used by Tensor objects. Default: ValidRegion. 2217 * @param[in] precision_type Precision type for floating point numbers. Default: stream default. 2218 * @param[in] precision Precision value for float point numbers. Default: 10. 2219 * @param[in] align_columns Whether to align columns when printed. Default: true. 2220 * @param[in] element_delim Delimeter between elements. Default: " ". 2221 * @param[in] row_delim Delimenter between rows. Default: "\n". 2222 */ 2223 IOFormatInfo(PrintRegion print_region = PrintRegion::ValidRegion, 2224 PrecisionType precision_type = PrecisionType::Default, 2225 unsigned int precision = 10, 2226 bool align_columns = true, 2227 std::string element_delim = " ", 2228 std::string row_delim = "\n") print_regionIOFormatInfo2229 : print_region(print_region), 2230 precision_type(precision_type), 2231 precision(precision), 2232 element_delim(element_delim), 2233 row_delim(row_delim), 2234 align_columns(align_columns) 2235 { 2236 } 2237 2238 /** Area to be printed by Tensor objects */ 2239 PrintRegion print_region; 2240 /** Floating point precision type */ 2241 PrecisionType precision_type; 2242 /** Floating point precision */ 2243 unsigned int precision; 2244 /** Element delimeter */ 2245 std::string element_delim; 2246 /** Row delimeter */ 2247 std::string row_delim; 2248 /** Align columns */ 2249 bool align_columns; 2250 }; 2251 2252 /** Internal keypoint class for Lucas-Kanade Optical Flow */ 2253 struct NELKInternalKeypoint 2254 { 2255 float x{ 0.f }; /**< x coordinate of the keypoint */ 2256 float y{ 0.f }; /**< y coordinate of the keypoint */ 2257 bool tracking_status{ false }; /**< the tracking status of the keypoint */ 2258 }; 2259 2260 } // namespace arm_compute 2261 #endif /* ARM_COMPUTE_TYPES_H */ 2262