1 /*
2  * Copyright (c) 2016-2022 Arm Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
24 #ifndef ARM_COMPUTE_TYPES_H
25 #define ARM_COMPUTE_TYPES_H
26 
27 #include "arm_compute/core/Coordinates.h"
28 #include "arm_compute/core/QuantizationInfo.h"
29 #include "arm_compute/core/Size2D.h"
30 #include "arm_compute/core/Size3D.h"
31 #include "arm_compute/core/Strides.h"
32 #include "arm_compute/core/TensorShape.h"
33 #include "arm_compute/core/experimental/IPostOp.h"
34 #include "arm_compute/core/utils/misc/Macros.h"
35 #include "support/Bfloat16.h"
36 #include "support/Half.h"
37 
38 #include <cmath>
39 #include <cstddef>
40 #include <cstdint>
41 #include <map>
42 #include <string>
43 #include <utility>
44 
45 namespace arm_compute
46 {
47 /** 16-bit floating point type */
48 using half = half_float::half;
49 
50 /** Permutation vector */
51 using PermutationVector = Strides;
52 /** Bidirectional strides */
53 using BiStrides = Coordinates;
54 
55 /** Image colour formats */
56 enum class Format
57 {
58     UNKNOWN,  /**< Unknown image format */
59     U8,       /**< 1 channel, 1 U8 per channel */
60     S16,      /**< 1 channel, 1 S16 per channel */
61     U16,      /**< 1 channel, 1 U16 per channel */
62     S32,      /**< 1 channel, 1 S32 per channel */
63     U32,      /**< 1 channel, 1 U32 per channel */
64     BFLOAT16, /**< 16-bit brain floating-point number */
65     F16,      /**< 1 channel, 1 F16 per channel */
66     F32,      /**< 1 channel, 1 F32 per channel */
67     UV88,     /**< 2 channel, 1 U8 per channel */
68     RGB888,   /**< 3 channels, 1 U8 per channel */
69     RGBA8888, /**< 4 channels, 1 U8 per channel */
70     YUV444,   /**< A 3 plane of 8 bit 4:4:4 sampled Y, U, V planes */
71     YUYV422,  /**< A single plane of 32-bit macro pixel of Y0, U0, Y1, V0 bytes */
72     NV12,     /**< A 2 plane YUV format of Luma (Y) and interleaved UV data at 4:2:0 sampling */
73     NV21,     /**< A 2 plane YUV format of Luma (Y) and interleaved VU data at 4:2:0 sampling */
74     IYUV,     /**< A 3 plane of 8-bit 4:2:0 sampled Y, U, V planes */
75     UYVY422   /**< A single plane of 32-bit macro pixel of U0, Y0, V0, Y1 byte */
76 };
77 
78 /** Available data types */
79 enum class DataType
80 {
81     UNKNOWN,            /**< Unknown data type */
82     U8,                 /**< unsigned 8-bit number */
83     S8,                 /**< signed 8-bit number */
84     QSYMM8,             /**< quantized, symmetric fixed-point 8-bit number */
85     QASYMM8,            /**< quantized, asymmetric fixed-point 8-bit number unsigned */
86     QASYMM8_SIGNED,     /**< quantized, asymmetric fixed-point 8-bit number signed */
87     QSYMM8_PER_CHANNEL, /**< quantized, symmetric per channel fixed-point 8-bit number */
88     U16,                /**< unsigned 16-bit number */
89     S16,                /**< signed 16-bit number */
90     QSYMM16,            /**< quantized, symmetric fixed-point 16-bit number */
91     QASYMM16,           /**< quantized, asymmetric fixed-point 16-bit number */
92     U32,                /**< unsigned 32-bit number */
93     S32,                /**< signed 32-bit number */
94     U64,                /**< unsigned 64-bit number */
95     S64,                /**< signed 64-bit number */
96     BFLOAT16,           /**< 16-bit brain floating-point number */
97     F16,                /**< 16-bit floating-point number */
98     F32,                /**< 32-bit floating-point number */
99     F64,                /**< 64-bit floating-point number */
100     SIZET               /**< size_t */
101 };
102 
103 /** Available Sampling Policies */
104 enum class SamplingPolicy
105 {
106     CENTER,  /**< Samples are taken at pixel center */
107     TOP_LEFT /**< Samples are taken at pixel top left corner */
108 };
109 
110 /** [DataLayout enum definition] **/
111 
112 /** Supported tensor data layouts */
113 enum class DataLayout
114 {
115     UNKNOWN, /**< Unknown data layout */
116     NCHW,    /**< Num samples, channels, height, width */
117     NHWC,    /**< Num samples, height, width, channels */
118     NCDHW,   /**< Num samples, channels, depth, height, width */
119     NDHWC    /**< Num samples, depth, height, width, channels */
120 };
121 /** [DataLayout enum definition] **/
122 
123 /** Supported tensor data layout dimensions */
124 enum class DataLayoutDimension
125 {
126     CHANNEL, /**< channel */
127     HEIGHT,  /**< height */
128     WIDTH,   /**< width */
129     DEPTH,   /**< depth */
130     BATCHES  /**< batches */
131 };
132 
133 /** Available ConvolutionMethod*/
134 enum class ConvolutionMethod
135 {
136     GEMM,        /**< Convolution using GEMM */
137     GEMM_CONV2D, /**< Direct 2D GEMM convolution */
138     DIRECT,      /**< Direct convolution */
139     INDIRECT,    /**< Indirect convolution */
140     WINOGRAD,    /**< Convolution using Winograd */
141     FFT          /**< Convolution using FFT */
142 };
143 
144 /** Available DepthwiseConvolutionFunction*/
145 enum class DepthwiseConvolutionFunction
146 {
147     OPTIMIZED, /**< Optimized Depthwise Convolution */
148     GENERIC,   /**< Generic Depthwise Convolution */
149 };
150 
151 /** Available DeconvolutionMethod*/
152 enum class DeconvolutionMethod
153 {
154     GEMM,            /**< Deconvolution using GEMM */
155     DIRECT,          /**< Direct deconvolution */
156     UPSCALE_CONV2D   /**< Deconvolution with Upscaling */
157 };
158 
159 /** Available FuseBatchNormalizationType*/
160 enum class FuseBatchNormalizationType
161 {
162     CONVOLUTION,         /**< For Convolution weights */
163     DEPTHWISECONVOLUTION /**< For Depthwise Convolution weights*/
164 };
165 
166 /** Padding mode to use for PadLayer */
167 enum class PaddingMode
168 {
169     CONSTANT,
170     REFLECT,
171     SYMMETRIC
172 };
173 
174 /** Supported comparison operations */
175 enum class ComparisonOperation
176 {
177     Equal,        /**< Equal comparison ( \f$ x == y \f$ ) */
178     NotEqual,     /**< NotEqual comparison ( \f$ x != y \f$ ) */
179     Greater,      /**< Greater comparison ( \f$ x > y \f$ ) */
180     GreaterEqual, /**< Greater equal comparison ( \f$ x >= y \f$ ) */
181     Less,         /**< Less comparison ( \f$ x < y \f$ ) */
182     LessEqual     /**< Less equal comparison ( \f$ x <= y \f$ ) */
183 };
184 
185 /** Container for valid region of a window */
186 struct ValidRegion
187 {
188     /** Default constructor */
ValidRegionValidRegion189     ValidRegion()
190         : anchor{}, shape{}
191     {
192     }
193 
194     /** Allow instances of this class to be copy constructed */
195     ValidRegion(const ValidRegion &) = default;
196     /** Allow instances of this class to be move constructed */
197     ValidRegion(ValidRegion &&) = default;
198     /** Allow instances of this class to be copied */
199     ValidRegion &operator=(const ValidRegion &) = default;
200     /** Allow instances of this class to be moved */
201     ValidRegion &operator=(ValidRegion &&) = default;
202     /** Default destructor */
203     ~ValidRegion() = default;
204 
205     /** Constructor for a valid region with default number of dimensions
206      *
207      * @param[in] an_anchor Anchor for the start of the valid region.
208      * @param[in] a_shape   Shape of the valid region.
209      *
210      */
ValidRegionValidRegion211     ValidRegion(const Coordinates &an_anchor, const TensorShape &a_shape)
212         : anchor{ an_anchor }, shape{ a_shape }
213     {
214         anchor.set_num_dimensions(std::max(anchor.num_dimensions(), shape.num_dimensions()));
215     }
216 
217     /** Constructor for a valid region with specified number of dimensions
218      *
219      * @param[in] an_anchor      Anchor for the start of the valid region.
220      * @param[in] a_shape        Shape of the valid region.
221      * @param[in] num_dimensions Number of dimensions (must be >= number of dimensions of anchor and shape).
222      *
223      */
ValidRegionValidRegion224     ValidRegion(const Coordinates &an_anchor, const TensorShape &a_shape, size_t num_dimensions)
225         : anchor{ an_anchor }, shape{ a_shape }
226     {
227         ARM_COMPUTE_ERROR_ON(num_dimensions < std::max(anchor.num_dimensions(), shape.num_dimensions()));
228         anchor.set_num_dimensions(num_dimensions);
229     }
230 
231     /** Return the start of the valid region for the given dimension @p d */
startValidRegion232     int start(unsigned int d) const
233     {
234         return anchor[d];
235     }
236 
237     /** Return the end of the valid region for the given dimension @p d */
endValidRegion238     int end(unsigned int d) const
239     {
240         return anchor[d] + shape[d];
241     }
242 
243     /** Accessor to set the value of anchor and shape for one of the dimensions.
244      *
245      * @param[in] dimension Dimension for which the value is set.
246      * @param[in] start     Value to be set in anchor for the dimension.
247      * @param[in] size      Value to be set in shape for the dimension.
248      *
249      * @return *this.
250      */
setValidRegion251     ValidRegion &set(size_t dimension, int start, size_t size)
252     {
253         anchor.set(dimension, start);
254         shape.set(dimension, size);
255         return *this;
256     }
257 
258     /** Check whether two valid regions are equal.
259      *
260      * @param[in] lhs LHS valid region
261      * @param[in] rhs RHS valid region
262      *
263      * @return True if the valid regions are the same.
264      */
265     inline friend bool operator==(const ValidRegion &lhs, const ValidRegion &rhs);
266 
267     Coordinates anchor; /**< Anchor for the start of the valid region. */
268     TensorShape shape;  /**< Shape of the valid region. */
269 };
270 inline bool operator==(const ValidRegion &lhs, const ValidRegion &rhs)
271 {
272     return (lhs.anchor == rhs.anchor) && (lhs.shape == rhs.shape);
273 }
274 
275 /** Methods available to handle borders */
276 enum class BorderMode
277 {
278     UNDEFINED, /**< Borders are left undefined */
279     CONSTANT,  /**< Pixels outside the image are assumed to have a constant value */
280     REPLICATE  /**< Pixels outside the image are assumed to have the same value as the closest image pixel */
281 };
282 
283 /** Container for 2D border size */
284 struct BorderSize
285 {
286     /** Empty border, i.e. no border */
BorderSizeBorderSize287     constexpr BorderSize() noexcept
288         : top{ 0 },
289     right{ 0 },
290     bottom{ 0 },
291     left{ 0 }
292     {
293     }
294 
295     /** Border with equal size around the 2D plane */
BorderSizeBorderSize296     explicit constexpr BorderSize(unsigned int size) noexcept
297         : top{ size },
298     right{ size },
299     bottom{ size },
300     left{ size }
301     {
302     }
303 
304     /** Border with same size for top/bottom and left/right */
BorderSizeBorderSize305     constexpr BorderSize(unsigned int top_bottom, unsigned int left_right)
306         : top{ top_bottom }, right{ left_right }, bottom{ top_bottom }, left{ left_right }
307     {
308     }
309 
310     /** Border with different sizes */
BorderSizeBorderSize311     constexpr BorderSize(unsigned int top, unsigned int right, unsigned int bottom, unsigned int left)
312         : top{ top }, right{ right }, bottom{ bottom }, left{ left }
313     {
314     }
315 
316     /** Check if the entire border is zero */
emptyBorderSize317     constexpr bool empty() const
318     {
319         return top == 0 && right == 0 && bottom == 0 && left == 0;
320     }
321 
322     /** Check if the border is the same size on all sides */
uniformBorderSize323     constexpr bool uniform() const
324     {
325         return top == right && top == bottom && top == left;
326     }
327 
328     /** Scale this border size.
329      *
330      * @param[in] scale Scale to multiply border size by.
331      *
332      * @return *this.
333      */
334     BorderSize &operator*=(float scale)
335     {
336         top *= scale;
337         right *= scale;
338         bottom *= scale;
339         left *= scale;
340 
341         return *this;
342     }
343 
344     /** Scale a copy of this border size.
345      *
346      * @param[in] scale Scale to multiply border size by.
347      *
348      * @return a scaled copy of this.
349      */
350     BorderSize operator*(float scale)
351     {
352         BorderSize size = *this;
353         size *= scale;
354 
355         return size;
356     }
357 
358     /** Check equality with another BorderSize struct
359      *
360      * @param[in] rhs other struct to check against
361      *
362      * @return true if they are equal
363      */
364     bool operator==(const BorderSize &rhs) const
365     {
366         return (top == rhs.top) && (right == rhs.right) && (bottom == rhs.bottom) && (left == rhs.left);
367     }
368 
369     /** Check non-equality with another BorderSize struct
370      *
371      * @param[in] rhs other struct to check against
372      *
373      * @return true if they are different
374      */
375     bool operator!=(const BorderSize &rhs) const
376     {
377         return !(*this == rhs);
378     }
379 
380     /** Limit this border size.
381      *
382      * @param[in] limit Border size to limit this border size to.
383      */
limitBorderSize384     void limit(const BorderSize &limit)
385     {
386         top    = std::min(top, limit.top);
387         right  = std::min(right, limit.right);
388         bottom = std::min(bottom, limit.bottom);
389         left   = std::min(left, limit.left);
390     }
391 
392     unsigned int top;    /**< top of the border */
393     unsigned int right;  /**< right of the border */
394     unsigned int bottom; /**< bottom of the border */
395     unsigned int left;   /**< left of the border */
396 };
397 
398 /** Container for 2D padding size */
399 using PaddingSize = BorderSize;
400 
401 /** Policy to handle integer overflow
402  *  @note: This is ignored by floating point operations where the overflow behavior adheres to the IEEE-754 standard
403  *         which states that in case of overflow ±infinity is returned for the round-to-nearest modes (and follows the
404  *         rounding rules for the directed rounding modes) by default.
405  */
406 enum class ConvertPolicy
407 {
408     WRAP,    /**< Wrap around */
409     SATURATE /**< Saturate */
410 };
411 
412 /** Interpolation method */
413 enum class InterpolationPolicy
414 {
415     NEAREST_NEIGHBOR, /**< Output values are defined to match the source pixel whose center is nearest to the sample position */
416     BILINEAR,         /**< Output values are defined by bilinear interpolation between the pixels */
417     AREA,             /**< Output values are determined by averaging the source pixels whose areas fall under the area of the destination pixel, projected onto the source image */
418 };
419 
420 /** Bilinear Interpolation method used by LKTracker */
421 enum class BilinearInterpolation
422 {
423     BILINEAR_OLD_NEW, /**< Old-new method */
424     BILINEAR_SCHARR   /**< Scharr method */
425 };
426 
427 /** Rectangle type */
428 struct Rectangle
429 {
430     uint16_t x;      /**< Top-left x coordinate */
431     uint16_t y;      /**< Top-left y coordinate */
432     uint16_t width;  /**< Width of the rectangle */
433     uint16_t height; /**< Height of the rectangle */
434 };
435 
436 /** Coordinate type */
437 struct Coordinates2D
438 {
439     int32_t x; /**< X coordinates */
440     int32_t y; /**< Y coordinates */
441 };
442 
443 /** Coordinate type */
444 struct Coordinates3D
445 {
446     uint32_t x; /**< X coordinates */
447     uint32_t y; /**< Y coordinates */
448     uint32_t z; /**< Z coordinates */
449 };
450 
451 /** Padding information as a pair of unsigned int start/end */
452 using PaddingInfo = std::pair<uint32_t, uint32_t>;
453 
454 /** List of padding information */
455 using PaddingList = std::vector<PaddingInfo>;
456 
457 /** Information to produce a tiled version of a Tensor */
458 using Multiples = std::vector<uint32_t>;
459 
460 /** Available channels */
461 enum class Channel
462 {
463     UNKNOWN, /** Unknown channel format */
464     C0,      /**< First channel (used by formats with unknown channel types). */
465     C1,      /**< Second channel (used by formats with unknown channel types). */
466     C2,      /**< Third channel (used by formats with unknown channel types). */
467     C3,      /**< Fourth channel (used by formats with unknown channel types). */
468     R,       /**< Red channel. */
469     G,       /**< Green channel. */
470     B,       /**< Blue channel. */
471     A,       /**< Alpha channel. */
472     Y,       /**< Luma channel. */
473     U,       /**< Cb/U channel. */
474     V        /**< Cr/V/Value channel. */
475 };
476 
477 /** Available reduction operations */
478 enum class ReductionOperation
479 {
480     ARG_IDX_MAX, /**< Index of the max value */
481     ARG_IDX_MIN, /**< Index of the min value */
482     MEAN_SUM,    /**< Mean of sum */
483     PROD,        /**< Product */
484     SUM_SQUARE,  /**< Sum of squares */
485     SUM,         /**< Sum */
486     MIN,         /**< Min */
487     MAX,         /**< Max */
488 };
489 
490 /** Available element-wise operations */
491 enum class ArithmeticOperation
492 {
493     ADD,          /**< (x + y) */
494     SUB,          /**< (x  - y) */
495     DIV,          /**< (x / y) */
496     MIN,          /**< Min(x, y) */
497     MAX,          /**< Max(x, y) */
498     SQUARED_DIFF, /**< (x - y)^2 */
499     POWER,        /**< x ^ y */
500     PRELU,        /**< y*x if x < 0, x otherwise */
501 };
502 
503 /** Available element wise unary operations */
504 enum class ElementWiseUnary
505 {
506     RSQRT,       /**< Reverse square root */
507     EXP,         /**< Exponential */
508     NEG,         /**< Negate */
509     LOG,         /**< Natural Logarithm */
510     ABS,         /**< Absolute value */
511     SIN,         /**< Sine */
512     ROUND,       /**< Round */
513     LOGICAL_NOT, /**< Logical Not */
514 };
515 
516 /** Available bitwise operations */
517 enum class BitwiseOperation
518 {
519     AND, /**< Bitwise AND operation */
520     NOT, /**< Bitwise NOT operation */
521     OR,  /**< Bitwise OR operation  */
522     XOR, /**< Bitwise XOR operation  */
523 };
524 
525 /** The normalization type used for the normalization layer */
526 enum class NormType
527 {
528     IN_MAP_1D, /**< Normalization applied within the same map in 1D region */
529     IN_MAP_2D, /**< Normalization applied within the same map in 2D region */
530     CROSS_MAP  /**< Normalization applied cross maps */
531 };
532 
533 /** Detection window used for the object detection. The detection window keeps the following information:
534  *
535  *  -# Geometry of the rectangular window (x/y of top-left corner and width/height)
536  *  -# Index of the class used for evaluating which class the detection window belongs to
537  *  -# Confidence value (score) obtained with the classifier
538  */
539 struct DetectionWindow
540 {
541     uint16_t x{ 0 };         /**< Top-left x coordinate */
542     uint16_t y{ 0 };         /**< Top-left y coordinate */
543     uint16_t width{ 0 };     /**< Width of the detection window */
544     uint16_t height{ 0 };    /**< Height of the detection window */
545     uint16_t idx_class{ 0 }; /**< Index of the class */
546     float    score{ 0.f };   /**< Confidence value for the detection window */
547 };
548 
549 /** Dimension rounding type when down-scaling on CNNs
550  * @note Used in pooling and convolution layer
551  */
552 enum class DimensionRoundingType
553 {
554     FLOOR, /**< Floor rounding */
555     CEIL   /**< Ceil rounding */
556 };
557 
558 /** Available pooling types */
559 enum class PoolingType
560 {
561     MAX, /**< Max Pooling */
562     AVG, /**< Average Pooling */
563     L2   /**< L2 Pooling */
564 };
565 
566 /** Available non maxima suppression types */
567 enum class NMSType
568 {
569     LINEAR,   /**< Linear NMS */
570     GAUSSIAN, /**< Gaussian NMS */
571     ORIGINAL  /**< Original NMS */
572 };
573 
574 /** BoxWithNonMaximaSuppressionLimit Information class */
575 class BoxNMSLimitInfo final
576 {
577 public:
578     /** Constructor
579      *
580      * @param[in] score_thresh             (Optional) Score threshold.
581      * @param[in] nms                      (Optional) NMS value
582      * @param[in] detections               (Optional) Number of detections
583      * @param[in] soft_nms_enabled         (Optional) Enable SoftNMS
584      * @param[in] soft_nms_method          (Optional) Soft NMS method
585      * @param[in] soft_nms_sigma           (Optional) Soft NMS sigma value
586      * @param[in] soft_nms_min_score_thres (Optional) Soft NMS minimum score threshold
587      * @param[in] suppress_size            (Optional) Filter out boxes based on their size. Defaults to false
588      * @param[in] min_size                 (Optional) Smaller boxes than min_size will be filtered out. Defaults to 1
589      * @param[in] im_width                 (Optional) Boxes whose centers (on the x axis) is beyond im_width will be filtered. Defaults to 1
590      * @param[in] im_height                (Optional) Boxes whose centers (on the y axis) is beyond im_height will be filtered. Defaults to 1
591      */
592     BoxNMSLimitInfo(float score_thresh = 0.05f, float nms = 0.3f,
593                     int detections = 100, bool soft_nms_enabled = false,
594                     NMSType soft_nms_method = NMSType::LINEAR,
595                     float soft_nms_sigma = 0.5f, float soft_nms_min_score_thres = 0.001f, bool suppress_size = false, float min_size = 1.0f, float im_width = 1.0f, float im_height = 1.0f)
_score_thresh(score_thresh)596         : _score_thresh(score_thresh), _nms(nms), _detections_per_im(detections), _soft_nms_enabled(soft_nms_enabled), _soft_nms_method(soft_nms_method), _soft_nms_sigma(soft_nms_sigma),
597           _soft_nms_min_score_thres(soft_nms_min_score_thres), _suppress_size(suppress_size), _min_size(min_size), _im_width(im_width), _im_height(im_height)
598     {
599     }
600     /** Get the score threshold */
score_thresh()601     float score_thresh() const
602     {
603         return _score_thresh;
604     }
605     /** Get the NMS */
nms()606     float nms() const
607     {
608         return _nms;
609     }
610     /** Get the number of detections */
detections_per_im()611     int detections_per_im() const
612     {
613         return _detections_per_im;
614     }
615     /** Check if soft NMS is enabled */
soft_nms_enabled()616     bool soft_nms_enabled() const
617     {
618         return _soft_nms_enabled;
619     }
620     /** Get soft NMS method */
soft_nms_method()621     NMSType soft_nms_method() const
622     {
623         return _soft_nms_method;
624     }
625     /** Get soft NMS sigma */
soft_nms_sigma()626     float soft_nms_sigma() const
627     {
628         return _soft_nms_sigma;
629     }
630     /** Get soft nms min score threshold */
soft_nms_min_score_thres()631     float soft_nms_min_score_thres() const
632     {
633         return _soft_nms_min_score_thres;
634     }
635     /** Get if NMS will suppress boxes based on their size/position */
suppress_size()636     bool suppress_size() const
637     {
638         return _suppress_size;
639     }
640     /** Get size suppression threshold */
min_size()641     float min_size() const
642     {
643         return _min_size;
644     }
645     /** Get image width (NMS may suppress boxes whose center sits beyond the image width) */
im_width()646     float im_width() const
647     {
648         return _im_width;
649     }
650     /** Get image height (NMS may suppress boxes whose center sits beyond the image height) */
im_height()651     float im_height() const
652     {
653         return _im_height;
654     }
655 
656 private:
657     float   _score_thresh;
658     float   _nms;
659     int     _detections_per_im;
660     bool    _soft_nms_enabled;
661     NMSType _soft_nms_method;
662     float   _soft_nms_sigma;
663     float   _soft_nms_min_score_thres;
664     bool    _suppress_size;
665     float   _min_size;
666     float   _im_width;
667     float   _im_height;
668 };
669 
670 /** Padding and stride information class */
671 class PadStrideInfo
672 {
673 public:
674     /** Constructor
675      *
676      * @param[in] stride_x (Optional) Stride, in elements, across x. Defaults to 1.
677      * @param[in] stride_y (Optional) Stride, in elements, across y. Defaults to 1.
678      * @param[in] pad_x    (Optional) Padding, in elements, across x. Defaults to 0.
679      * @param[in] pad_y    (Optional) Padding, in elements, across y. Defaults to 0.
680      * @param[in] round    (Optional) Dimensions rounding. Defaults to @ref FLOOR.
681      */
682     PadStrideInfo(unsigned int stride_x = 1, unsigned int stride_y = 1,
683                   unsigned int pad_x = 0, unsigned int pad_y = 0,
684                   DimensionRoundingType round = DimensionRoundingType::FLOOR)
_stride(std::make_pair (stride_x,stride_y))685         : _stride(std::make_pair(stride_x, stride_y)),
686           _pad_left(pad_x),
687           _pad_top(pad_y),
688           _pad_right(pad_x),
689           _pad_bottom(pad_y),
690           _round_type(round)
691     {
692     }
693     /** Constructor
694      *
695      * @param[in] stride_x   Stride, in elements, across x.
696      * @param[in] stride_y   Stride, in elements, across y.
697      * @param[in] pad_left   Padding across x on the left, in elements.
698      * @param[in] pad_right  Padding across x on the right, in elements.
699      * @param[in] pad_top    Padding across y on the top, in elements.
700      * @param[in] pad_bottom Padding across y on the bottom, in elements.
701      * @param[in] round      Dimensions rounding.
702      */
PadStrideInfo(unsigned int stride_x,unsigned int stride_y,unsigned int pad_left,unsigned int pad_right,unsigned int pad_top,unsigned int pad_bottom,DimensionRoundingType round)703     PadStrideInfo(unsigned int stride_x, unsigned int stride_y,
704                   unsigned int pad_left, unsigned int pad_right,
705                   unsigned int pad_top, unsigned int pad_bottom,
706                   DimensionRoundingType round)
707         : _stride(std::make_pair(stride_x, stride_y)),
708           _pad_left(pad_left),
709           _pad_top(pad_top),
710           _pad_right(pad_right),
711           _pad_bottom(pad_bottom),
712           _round_type(round)
713     {
714     }
715     /** Get the stride.
716      *
717      * @return a pair: stride x, stride y.
718      */
stride()719     std::pair<unsigned int, unsigned int> stride() const
720     {
721         return _stride;
722     }
723     /** Check whether the padding is symmetric.
724      *
725      * @return True if the padding is symmetric.
726      */
padding_is_symmetric()727     bool padding_is_symmetric() const
728     {
729         return (_pad_left == _pad_right) && (_pad_top == _pad_bottom);
730     }
731     /** Get the padding.
732      *
733      * @note This should only be used when the padding is symmetric.
734      *
735      * @return a pair: padding left/right, padding top/bottom
736      */
pad()737     std::pair<unsigned int, unsigned int> pad() const
738     {
739         //this accessor should be used only when padding is symmetric
740         ARM_COMPUTE_ERROR_ON(!padding_is_symmetric());
741         return std::make_pair(_pad_left, _pad_top);
742     }
743 
744     /** Get the left padding */
pad_left()745     unsigned int pad_left() const
746     {
747         return _pad_left;
748     }
749     /** Get the right padding */
pad_right()750     unsigned int pad_right() const
751     {
752         return _pad_right;
753     }
754     /** Get the top padding */
pad_top()755     unsigned int pad_top() const
756     {
757         return _pad_top;
758     }
759     /** Get the bottom padding */
pad_bottom()760     unsigned int pad_bottom() const
761     {
762         return _pad_bottom;
763     }
764 
765     /** Get the rounding type */
round()766     DimensionRoundingType round() const
767     {
768         return _round_type;
769     }
770 
771     /** Check whether this has any padding */
has_padding()772     bool has_padding() const
773     {
774         return (_pad_left != 0 || _pad_top != 0 || _pad_right != 0 || _pad_bottom != 0);
775     }
776 
777 private:
778     std::pair<unsigned int, unsigned int> _stride;
779     unsigned int _pad_left;
780     unsigned int _pad_top;
781     unsigned int _pad_right;
782     unsigned int _pad_bottom;
783 
784     DimensionRoundingType _round_type;
785 };
786 
787 /** Padding information for 2D operations like Conv2d */
788 struct Padding2D
789 {
790     Padding2D() = default;
Padding2DPadding2D791     Padding2D(size_t left, size_t right, size_t top, size_t bottom)
792         : left(left), right(right), top(top), bottom(bottom)
793     {
794     }
795     size_t left   = { 0 }; /**<  Padding across the width dimension on the left, in elements. */
796     size_t right  = { 0 }; /**<  Padding across the width dimension on the right, in elements. */
797     size_t top    = { 0 }; /**<  Padding across the height dimension on the top, in elements. */
798     size_t bottom = { 0 }; /**<  Padding across the height dimension on the bottom, in elements. */
799 };
800 
801 /** Padding information for 3D operations like Conv3d */
802 struct Padding3D
803 {
Padding3DPadding3D804     Padding3D() noexcept
805     {
806     }
807 
Padding3DPadding3D808     Padding3D(size_t pad_x, size_t pad_y, size_t pad_z)
809         : left(pad_x), right(pad_x), top(pad_y), bottom(pad_y), front(pad_z), back(pad_z)
810     {
811     }
812 
Padding3DPadding3D813     Padding3D(size_t left, size_t right, size_t top, size_t bottom, size_t front, size_t back)
814         : left(left), right(right), top(top), bottom(bottom), front(front), back(back)
815     {
816     }
817 
818     size_t left   = { 0 }; /**<  Padding across the width dimenstion on the left, in elements. */
819     size_t right  = { 0 }; /**<  Padding across the width dimenstion on the right, in elements. */
820     size_t top    = { 0 }; /**<  Padding across the height dimenstion  on the top, in elements. */
821     size_t bottom = { 0 }; /**<  Padding across the height dimenstion on the bottom, in elements. */
822     size_t front  = { 0 }; /**<  Padding across the depth dimenstion on the front, in elements. */
823     size_t back   = { 0 }; /**<  Padding across the depth dimenstion on the back, in elements. */
824 };
825 
826 /** PriorBox layer info */
827 class PriorBoxLayerInfo final
828 {
829 public:
830     /** Default Constructor */
PriorBoxLayerInfo()831     PriorBoxLayerInfo()
832         : _min_sizes(),
833           _variances(),
834           _offset(),
835           _flip(true),
836           _clip(false),
837           _max_sizes(),
838           _aspect_ratios(),
839           _img_size(),
840           _steps()
841     {
842     }
843     /** Constructor
844      *
845      * @param[in] min_sizes     Min sizes vector.
846      * @param[in] variances     Variances vector.
847      * @param[in] offset        Offset value.
848      * @param[in] flip          (Optional) Flip the aspect ratios.
849      * @param[in] clip          (Optional) Clip coordinates so that they're within [0,1].
850      * @param[in] max_sizes     (Optional) Max sizes vector.
851      * @param[in] aspect_ratios (Optional) Aspect ratios of the boxes.
852      * @param[in] img_size      (Optional) Image size.
853      * @param[in] steps         (Optional) Step values.
854      */
855     PriorBoxLayerInfo(const std::vector<float> &min_sizes, const std::vector<float> &variances, float offset, bool flip = true, bool clip = false,
856                       const std::vector<float> &max_sizes = {}, const std::vector<float> &aspect_ratios = {},
857     const Coordinates2D &img_size = Coordinates2D{ 0, 0 }, const std::array<float, 2> &steps = { { 0.f, 0.f } })
_min_sizes(min_sizes)858         : _min_sizes(min_sizes),
859           _variances(variances),
860           _offset(offset),
861           _flip(flip),
862           _clip(clip),
863           _max_sizes(max_sizes),
864           _aspect_ratios(),
865           _img_size(img_size),
866           _steps(steps)
867     {
868         _aspect_ratios.push_back(1.);
869         for(unsigned int i = 0; i < aspect_ratios.size(); ++i)
870         {
871             float ar            = aspect_ratios[i];
872             bool  already_exist = false;
873             for(auto ar_new : _aspect_ratios)
874             {
875                 if(fabs(ar - ar_new) < 1e-6)
876                 {
877                     already_exist = true;
878                     break;
879                 }
880             }
881             if(!already_exist)
882             {
883                 _aspect_ratios.push_back(ar);
884                 if(flip)
885                 {
886                     _aspect_ratios.push_back(1.f / ar);
887                 }
888             }
889         }
890     }
891     /** Get min sizes. */
min_sizes()892     std::vector<float> min_sizes() const
893     {
894         return _min_sizes;
895     }
896     /** Get min variances. */
variances()897     std::vector<float> variances() const
898     {
899         return _variances;
900     }
901     /** Get the step coordinates */
steps()902     std::array<float, 2> steps() const
903     {
904         return _steps;
905     }
906     /** Get the image size coordinates */
img_size()907     Coordinates2D img_size() const
908     {
909         return _img_size;
910     }
911     /** Get the offset */
offset()912     float offset() const
913     {
914         return _offset;
915     }
916     /** Get the flip value */
flip()917     bool flip() const
918     {
919         return _flip;
920     }
921     /** Get the clip value */
clip()922     bool clip() const
923     {
924         return _clip;
925     }
926     /** Get max sizes. */
max_sizes()927     std::vector<float> max_sizes() const
928     {
929         return _max_sizes;
930     }
931     /** Get aspect ratios. */
aspect_ratios()932     std::vector<float> aspect_ratios() const
933     {
934         return _aspect_ratios;
935     }
936 
937 private:
938     std::vector<float> _min_sizes;
939     std::vector<float> _variances;
940     float              _offset;
941     bool               _flip;
942     bool               _clip;
943     std::vector<float> _max_sizes;
944     std::vector<float> _aspect_ratios;
945     Coordinates2D      _img_size;
946     std::array<float, 2> _steps;
947 };
948 
949 // Bounding Box [xmin, ymin, xmax, ymax]
950 using BBox = std::array<float, 4>;
951 // LabelBBox used for map label and bounding box
952 using LabelBBox = std::map<int, std::vector<BBox>>;
953 
954 /** Available Detection Output code types */
955 enum class DetectionOutputLayerCodeType
956 {
957     CORNER,      /**< Use box corners */
958     CENTER_SIZE, /**< Use box centers and size */
959     CORNER_SIZE, /**< Use box centers and size */
960     TF_CENTER    /**< Use box centers and size but flip x and y co-ordinates */
961 };
962 
963 /** Detection Output layer info */
964 class DetectionOutputLayerInfo final
965 {
966 public:
967     /** Default Constructor */
DetectionOutputLayerInfo()968     DetectionOutputLayerInfo()
969         : _num_classes(),
970           _share_location(),
971           _code_type(DetectionOutputLayerCodeType::CORNER),
972           _keep_top_k(),
973           _nms_threshold(),
974           _top_k(),
975           _background_label_id(),
976           _confidence_threshold(),
977           _variance_encoded_in_target(false),
978           _eta(),
979           _num_loc_classes()
980     {
981         _num_loc_classes = _share_location ? 1 : _num_classes;
982     }
983     /** Constructor
984      *
985      * @param[in] num_classes                Number of classes to be predicted.
986      * @param[in] share_location             If true, bounding box are shared among different classes.
987      * @param[in] code_type                  Type of coding method for bbox.
988      * @param[in] keep_top_k                 Number of total bounding boxes to be kept per image after NMS step.
989      * @param[in] nms_threshold              Threshold to be used in NMS.
990      * @param[in] top_k                      (Optional) Number of boxes per image with top confidence scores that are fed into the NMS algorithm. Default set to -1.
991      * @param[in] background_label_id        (Optional) Background label ID. If there is no background class, set it as -1.
992      * @param[in] confidence_threshold       (Optional) Only consider detections whose confidences are larger than a threshold. Default set to -FLT_MAX.
993      * @param[in] variance_encoded_in_target (Optional) If true, variance is encoded in target. Otherwise we need to adjust the predicted offset accordingly.Default set to false.
994      * @param[in] eta                        (Optional) Eta.
995      */
996     DetectionOutputLayerInfo(int num_classes, bool share_location, DetectionOutputLayerCodeType code_type, int keep_top_k, float nms_threshold, int top_k = -1, int background_label_id = -1,
997                              float confidence_threshold = std::numeric_limits<float>::lowest(), bool variance_encoded_in_target = false, float eta = 1)
_num_classes(num_classes)998         : _num_classes(num_classes),
999           _share_location(share_location),
1000           _code_type(code_type),
1001           _keep_top_k(keep_top_k),
1002           _nms_threshold(nms_threshold),
1003           _top_k(top_k),
1004           _background_label_id(background_label_id),
1005           _confidence_threshold(confidence_threshold),
1006           _variance_encoded_in_target(variance_encoded_in_target),
1007           _eta(eta),
1008           _num_loc_classes()
1009     {
1010         _num_loc_classes = _share_location ? 1 : _num_classes;
1011     }
1012     /** Get num classes. */
num_classes()1013     int num_classes() const
1014     {
1015         return _num_classes;
1016     }
1017     /** Get share location. */
share_location()1018     bool share_location() const
1019     {
1020         return _share_location;
1021     }
1022     /** Get detection output code type. */
code_type()1023     DetectionOutputLayerCodeType code_type() const
1024     {
1025         return _code_type;
1026     }
1027     /** Get if variance encoded in target. */
variance_encoded_in_target()1028     bool variance_encoded_in_target() const
1029     {
1030         return _variance_encoded_in_target;
1031     }
1032     /** Get the number of total bounding boxes to be kept per image. */
keep_top_k()1033     int keep_top_k() const
1034     {
1035         return _keep_top_k;
1036     }
1037     /** Get nms threshold. */
nms_threshold()1038     float nms_threshold() const
1039     {
1040         return _nms_threshold;
1041     }
1042     /** Get eta. */
eta()1043     float eta() const
1044     {
1045         return _eta;
1046     }
1047     /** Get background label ID. */
background_label_id()1048     int background_label_id() const
1049     {
1050         return _background_label_id;
1051     }
1052     /** Get confidence threshold. */
confidence_threshold()1053     float confidence_threshold() const
1054     {
1055         return _confidence_threshold;
1056     }
1057     /** Get top K. */
top_k()1058     int top_k() const
1059     {
1060         return _top_k;
1061     }
1062     /** Get number of location classes. */
num_loc_classes()1063     int num_loc_classes() const
1064     {
1065         return _num_loc_classes;
1066     }
1067 
1068 private:
1069     int                          _num_classes;
1070     bool                         _share_location;
1071     DetectionOutputLayerCodeType _code_type;
1072     int                          _keep_top_k;
1073     float                        _nms_threshold;
1074     int                          _top_k;
1075     int                          _background_label_id;
1076     float                        _confidence_threshold;
1077     bool                         _variance_encoded_in_target;
1078     float                        _eta;
1079     int                          _num_loc_classes;
1080 };
1081 
1082 /** Detection Output layer info */
1083 class DetectionPostProcessLayerInfo final
1084 {
1085 public:
1086     /** Default Constructor */
DetectionPostProcessLayerInfo()1087     DetectionPostProcessLayerInfo()
1088         : _max_detections(),
1089           _max_classes_per_detection(),
1090           _nms_score_threshold(),
1091           _iou_threshold(),
1092           _num_classes(),
1093           _scales_values(),
1094           _use_regular_nms(),
1095           _detection_per_class(),
1096           _dequantize_scores()
1097     {
1098     }
1099     /** Constructor
1100      *
1101      * @param[in] max_detections            Number of total detection.
1102      * @param[in] max_classes_per_detection Number of total classes to be kept after NMS step. Used in the Fast Non-Max-Suppression
1103      * @param[in] nms_score_threshold       Threshold to be used in NMS
1104      * @param[in] iou_threshold             Threshold to be used during the intersection over union.
1105      * @param[in] num_classes               Number of classes.
1106      * @param[in] scales_values             Scales values used for decode center size boxes.
1107      * @param[in] use_regular_nms           (Optional) Boolean to determinate if use regular or fast nms. Defaults to false.
1108      * @param[in] detection_per_class       (Optional) Number of detection per class. Used in the Regular Non-Max-Suppression. Defaults to 100.
1109      * @param[in] dequantize_scores         (Optional) If the scores need to be dequantized. Defaults to true.
1110      */
1111     DetectionPostProcessLayerInfo(unsigned int max_detections, unsigned int max_classes_per_detection, float nms_score_threshold, float iou_threshold, unsigned int num_classes,
1112                                   std::array<float, 4> scales_values, bool use_regular_nms = false, unsigned int detection_per_class = 100, bool dequantize_scores = true)
_max_detections(max_detections)1113         : _max_detections(max_detections),
1114           _max_classes_per_detection(max_classes_per_detection),
1115           _nms_score_threshold(nms_score_threshold),
1116           _iou_threshold(iou_threshold),
1117           _num_classes(num_classes),
1118           _scales_values(scales_values),
1119           _use_regular_nms(use_regular_nms),
1120           _detection_per_class(detection_per_class),
1121           _dequantize_scores(dequantize_scores)
1122     {
1123     }
1124     /** Get max detections. */
max_detections()1125     unsigned int max_detections() const
1126     {
1127         return _max_detections;
1128     }
1129     /** Get max_classes per detection. Used in the Fast Non-Max-Suppression.*/
max_classes_per_detection()1130     unsigned int max_classes_per_detection() const
1131     {
1132         return _max_classes_per_detection;
1133     }
1134     /** Get detection per class. Used in the Regular Non-Max-Suppression */
detection_per_class()1135     unsigned int detection_per_class() const
1136     {
1137         return _detection_per_class;
1138     }
1139     /** Get nms threshold. */
nms_score_threshold()1140     float nms_score_threshold() const
1141     {
1142         return _nms_score_threshold;
1143     }
1144     /** Get intersection over union threshold. */
iou_threshold()1145     float iou_threshold() const
1146     {
1147         return _iou_threshold;
1148     }
1149     /** Get num classes. */
num_classes()1150     unsigned int num_classes() const
1151     {
1152         return _num_classes;
1153     }
1154     /** Get if use regular nms. */
use_regular_nms()1155     bool use_regular_nms() const
1156     {
1157         return _use_regular_nms;
1158     }
1159     /** Get y scale value. */
scale_value_y()1160     float scale_value_y() const
1161     {
1162         // Saved as [y,x,h,w]
1163         return _scales_values[0];
1164     }
1165     /** Get x scale value. */
scale_value_x()1166     float scale_value_x() const
1167     {
1168         // Saved as [y,x,h,w]
1169         return _scales_values[1];
1170     }
1171     /** Get h scale value. */
scale_value_h()1172     float scale_value_h() const
1173     {
1174         // Saved as [y,x,h,w]
1175         return _scales_values[2];
1176     }
1177     /** Get w scale value. */
scale_value_w()1178     float scale_value_w() const
1179     {
1180         // Saved as [y,x,h,w]
1181         return _scales_values[3];
1182     }
1183     /** Get dequantize_scores value. */
dequantize_scores()1184     bool dequantize_scores() const
1185     {
1186         return _dequantize_scores;
1187     }
1188 
1189 private:
1190     unsigned int _max_detections;
1191     unsigned int _max_classes_per_detection;
1192     float        _nms_score_threshold;
1193     float        _iou_threshold;
1194     unsigned int _num_classes;
1195     std::array<float, 4> _scales_values;
1196     bool         _use_regular_nms;
1197     unsigned int _detection_per_class;
1198     bool         _dequantize_scores;
1199 };
1200 
1201 /** Pooling Layer Information struct*/
1202 struct PoolingLayerInfo
1203 {
1204     /** Default Constructor */
PoolingLayerInfoPoolingLayerInfo1205     PoolingLayerInfo()
1206         : pool_type(PoolingType::MAX),
1207           pool_size(Size2D()),
1208           data_layout(DataLayout::UNKNOWN),
1209           pad_stride_info(PadStrideInfo()),
1210           exclude_padding(false),
1211           is_global_pooling(false),
1212           fp_mixed_precision(false)
1213     {
1214     }
1215     /** Constructor
1216      *
1217      * @param[in] pool_type          Pooling type @ref PoolingType.
1218      * @param[in] pool_size          Pooling size, in elements, across  x and y.
1219      * @param[in] data_layout        Data layout used by the layer @ref DataLayout
1220      * @param[in] pad_stride_info    (Optional) Padding and stride information @ref PadStrideInfo
1221      * @param[in] exclude_padding    (Optional) Strategy when accounting padding in calculations.
1222      *                               True will exclude padding while false will not (Used in AVG/L2 pooling to determine the pooling area).
1223      *                               Defaults to false;
1224      * @param[in] fp_mixed_precision (Optional) Use wider accumulators (32 bit instead of 16 for FP16) to improve accuracy.
1225      */
1226     explicit PoolingLayerInfo(PoolingType   pool_type,
1227                               unsigned int  pool_size,
1228                               DataLayout    data_layout,
1229                               PadStrideInfo pad_stride_info    = PadStrideInfo(),
1230                               bool          exclude_padding    = false,
1231                               bool          fp_mixed_precision = false)
pool_typePoolingLayerInfo1232         : pool_type(pool_type),
1233           pool_size(Size2D(pool_size, pool_size)),
1234           data_layout(data_layout),
1235           pad_stride_info(pad_stride_info),
1236           exclude_padding(exclude_padding),
1237           is_global_pooling(false),
1238           fp_mixed_precision(fp_mixed_precision)
1239     {
1240     }
1241 
1242     /** Constructor
1243      *
1244      * @param[in] pool_type          Pooling type @ref PoolingType.
1245      * @param[in] pool_size          Pooling size, in elements, across  x and y.
1246      * @param[in] data_layout        Data layout used by the layer @ref DataLayout
1247      * @param[in] pad_stride_info    (Optional) Padding and stride information @ref PadStrideInfo
1248      * @param[in] exclude_padding    (Optional) Strategy when accounting padding in calculations.
1249      *                               True will exclude padding while false will not (Used in AVG/L2 pooling to determine the pooling area).
1250      *                               Defaults to false;
1251      * @param[in] fp_mixed_precision (Optional) Use wider accumulators (32 bit instead of 16 for FP16) to improve accuracy.
1252      */
1253     explicit PoolingLayerInfo(PoolingType   pool_type,
1254                               Size2D        pool_size,
1255                               DataLayout    data_layout,
1256                               PadStrideInfo pad_stride_info    = PadStrideInfo(),
1257                               bool          exclude_padding    = false,
1258                               bool          fp_mixed_precision = false)
pool_typePoolingLayerInfo1259         : pool_type(pool_type),
1260           pool_size(pool_size),
1261           data_layout(data_layout),
1262           pad_stride_info(pad_stride_info),
1263           exclude_padding(exclude_padding),
1264           is_global_pooling(false),
1265           fp_mixed_precision(fp_mixed_precision)
1266     {
1267     }
1268 
1269     /** Constructor
1270      *
1271      * @note This constructor is used for global pooling
1272      *
1273      * @param[in] pool_type   Pooling type @ref PoolingType.
1274      * @param[in] data_layout Data layout used by the layer @ref DataLayout
1275      */
PoolingLayerInfoPoolingLayerInfo1276     explicit PoolingLayerInfo(PoolingType pool_type, DataLayout data_layout)
1277         : pool_type(pool_type),
1278           pool_size(Size2D()),
1279           data_layout(data_layout),
1280           pad_stride_info(PadStrideInfo(1, 1, 0, 0)),
1281           exclude_padding(false),
1282           is_global_pooling(true),
1283           fp_mixed_precision(false)
1284     {
1285     }
1286 
1287     PoolingType   pool_type;
1288     Size2D        pool_size;
1289     DataLayout    data_layout;
1290     PadStrideInfo pad_stride_info;
1291     bool          exclude_padding;
1292     bool          is_global_pooling;
1293     bool          fp_mixed_precision;
1294 };
1295 
1296 /** Pooling Layer Information struct*/
1297 struct Pooling3dLayerInfo
1298 {
1299     /** Default Constructor */
Pooling3dLayerInfoPooling3dLayerInfo1300     Pooling3dLayerInfo() noexcept
1301         : pool_type(PoolingType::MAX),
1302           pool_size(Size3D()),
1303           stride(Size3D()),
1304           padding(Padding3D()),
1305           exclude_padding(false),
1306           is_global_pooling(false),
1307           fp_mixed_precision(false),
1308           round_type(DimensionRoundingType::FLOOR)
1309     {
1310     }
1311     /** Constructor
1312      *
1313      * @param[in] pool_type          Pooling type @ref PoolingType.
1314      * @param[in] pool_size          Pooling size, in elements, across x, y and z.
1315      * @param[in] stride             (Optional) stride information @ref Size3D
1316      * @param[in] padding            (Optional) padding information @ref Padding3D
1317      * @param[in] exclude_padding    (Optional) Strategy when accounting padding in calculations.
1318      *                               True will exclude padding while false will not (Used in AVG/L2 pooling to determine the pooling area).
1319      *                               Defaults to false;
1320      * @param[in] fp_mixed_precision (Optional) Use wider accumulators (32 bit instead of 16 for FP16) to improve accuracy.
1321      * @param[in] round_type         (Optional) Dimensions rounding. Defaults to @ref FLOOR
1322      */
1323     explicit Pooling3dLayerInfo(PoolingType           pool_type,
1324                                 unsigned int          pool_size,
1325                                 Size3D                stride             = Size3D(1U, 1U, 1U),
1326                                 Padding3D             padding            = Padding3D(),
1327                                 bool                  exclude_padding    = false,
1328                                 bool                  fp_mixed_precision = false,
1329                                 DimensionRoundingType round_type         = DimensionRoundingType::FLOOR)
pool_typePooling3dLayerInfo1330         : pool_type(pool_type),
1331           pool_size(Size3D(pool_size, pool_size, pool_size)),
1332           stride(stride),
1333           padding(padding),
1334           exclude_padding(exclude_padding),
1335           is_global_pooling(false),
1336           fp_mixed_precision(fp_mixed_precision),
1337           round_type(round_type)
1338     {
1339     }
1340 
1341     /** Constructor
1342      *
1343      * @param[in] pool_type          Pooling type @ref PoolingType.
1344      * @param[in] pool_size          Pooling size, in elements, across  x, y and z.
1345      * @param[in] stride             (Optional) stride information @ref Size3D
1346      * @param[in] padding            (Optional) padding information @ref Padding3D
1347      * @param[in] exclude_padding    (Optional) Strategy when accounting padding in calculations.
1348      *                               True will exclude padding while false will not (Used in AVG/L2 pooling to determine the pooling area).
1349      *                               Defaults to false;
1350      * @param[in] fp_mixed_precision (Optional) Use wider accumulators (32 bit instead of 16 for FP16) to improve accuracy.
1351      * @param[in] round_type         (Optional) Dimensions rounding. Defaults to @ref FLOOR
1352      */
1353     explicit Pooling3dLayerInfo(PoolingType           pool_type,
1354                                 Size3D                pool_size,
1355                                 Size3D                stride             = Size3D(1U, 1U, 1U),
1356                                 Padding3D             padding            = Padding3D(),
1357                                 bool                  exclude_padding    = false,
1358                                 bool                  fp_mixed_precision = false,
1359                                 DimensionRoundingType round_type         = DimensionRoundingType::FLOOR)
pool_typePooling3dLayerInfo1360         : pool_type(pool_type),
1361           pool_size(pool_size),
1362           stride(stride),
1363           padding(padding),
1364           exclude_padding(exclude_padding),
1365           is_global_pooling(false),
1366           fp_mixed_precision(fp_mixed_precision),
1367           round_type(round_type)
1368     {
1369     }
1370 
1371     /** Constructor
1372      *
1373      * @note This constructor is used for global pooling
1374      *
1375      * @param[in] pool_type Pooling type @ref PoolingType.
1376      */
Pooling3dLayerInfoPooling3dLayerInfo1377     explicit Pooling3dLayerInfo(PoolingType pool_type)
1378         : pool_type(pool_type),
1379           pool_size(Size3D()),
1380           stride(Size3D(1U, 1U, 1U)),
1381           padding(Padding3D(0, 0, 0)),
1382           exclude_padding(false),
1383           is_global_pooling(true),
1384           fp_mixed_precision(false),
1385           round_type(DimensionRoundingType::FLOOR)
1386     {
1387     }
1388 
1389     PoolingType           pool_type;
1390     Size3D                pool_size;
1391     Size3D                stride;
1392     Padding3D             padding;
1393     bool                  exclude_padding;
1394     bool                  is_global_pooling;
1395     bool                  fp_mixed_precision;
1396     DimensionRoundingType round_type;
1397 };
1398 
1399 /** ROI Pooling Layer Information class */
1400 class ROIPoolingLayerInfo final
1401 {
1402 public:
1403     /** Constructor
1404      *
1405      * @param[in] pooled_width   Pooled width of the layer.
1406      * @param[in] pooled_height  Pooled height of the layer.
1407      * @param[in] spatial_scale  Spatial scale to be applied to the ROI coordinates and dimensions.
1408      * @param[in] sampling_ratio Number of samples to include in each pooling region (if set to zero, a ceil(roi_dims/pooling_dims))
1409      */
1410     ROIPoolingLayerInfo(unsigned int pooled_width, unsigned int pooled_height, float spatial_scale, unsigned int sampling_ratio = 0)
_pooled_width(pooled_width)1411         : _pooled_width(pooled_width), _pooled_height(pooled_height), _spatial_scale(spatial_scale), _sampling_ratio(sampling_ratio)
1412     {
1413     }
1414     /** Get the pooled width of the layer */
pooled_width()1415     unsigned int pooled_width() const
1416     {
1417         return _pooled_width;
1418     }
1419     /** Get the pooled height of the layer */
pooled_height()1420     unsigned int pooled_height() const
1421     {
1422         return _pooled_height;
1423     }
1424     /** Get the spatial scale */
spatial_scale()1425     float spatial_scale() const
1426     {
1427         return _spatial_scale;
1428     }
1429     /** Get sampling ratio */
sampling_ratio()1430     unsigned int sampling_ratio() const
1431     {
1432         return _sampling_ratio;
1433     }
1434 
1435 private:
1436     unsigned int _pooled_width;
1437     unsigned int _pooled_height;
1438     float        _spatial_scale;
1439     unsigned int _sampling_ratio;
1440 };
1441 
1442 /** Generate Proposals Information class */
1443 class GenerateProposalsInfo
1444 {
1445 public:
1446     /** Constructor
1447      *
1448      * @param[in] im_width       Width of the original image
1449      * @param[in] im_height      Height of the original image
1450      * @param[in] im_scale       Scale applied to the original image
1451      * @param[in] spatial_scale  (Optional)Scale applied to the feature map. Defaults to 1.0
1452      * @param[in] pre_nms_topN   (Optional)Number of the best scores to be selected from the transformations. Defaults to 6000.
1453      * @param[in] post_nms_topN  (Optional)Number of the best scores to be selected from the NMS operation. Defaults to 300.
1454      * @param[in] nms_thres      (Optional)NMS overlap threshold. Defaults to 0.7.
1455      * @param[in] min_size       (Optional)Size used to validate the anchors produced. Defaults to 16.
1456      * @param[in] values_per_roi (Optional)Values used to represent a ROI(Region of interest). Defaults to 4.
1457      */
1458     GenerateProposalsInfo(float im_width, float im_height, float im_scale, float spatial_scale = 1.0, int pre_nms_topN = 6000, int post_nms_topN = 300, float nms_thres = 0.7, float min_size = 16.0,
1459                           size_t values_per_roi = 4)
_im_height(im_height)1460         : _im_height(im_height), _im_width(im_width), _im_scale(im_scale), _spatial_scale(spatial_scale), _pre_nms_topN(pre_nms_topN), _post_nms_topN(post_nms_topN), _nms_thres(nms_thres),
1461           _min_size(min_size), _values_per_roi(values_per_roi)
1462     {
1463     }
1464 
1465     /* Get the original height */
im_height()1466     float im_height() const
1467     {
1468         return _im_height;
1469     }
1470     /* Get the original width */
im_width()1471     float im_width() const
1472     {
1473         return _im_width;
1474     }
1475     /* Get the image scale */
im_scale()1476     float im_scale() const
1477     {
1478         return _im_scale;
1479     }
1480     /* Get the value of how many best scores to select (before NMS) */
pre_nms_topN()1481     int pre_nms_topN() const
1482     {
1483         return _pre_nms_topN;
1484     }
1485     /* Get the value of how many best scores to select (after NMS) */
post_nms_topN()1486     int post_nms_topN() const
1487     {
1488         return _post_nms_topN;
1489     }
1490     /* Get the NMS overlap threshold */
nms_thres()1491     float nms_thres() const
1492     {
1493         return _nms_thres;
1494     }
1495     /* Get the minimal size */
min_size()1496     float min_size() const
1497     {
1498         return _min_size;
1499     }
1500     /* Get the spatial scale to be applied to the feature maps */
spatial_scale()1501     float spatial_scale() const
1502     {
1503         return _spatial_scale;
1504     }
1505     /* Get the values used to represent a ROI(Region of interest)*/
values_per_roi()1506     size_t values_per_roi() const
1507     {
1508         return _values_per_roi;
1509     }
1510 
1511 private:
1512     float  _im_height;
1513     float  _im_width;
1514     float  _im_scale;
1515     float  _spatial_scale;
1516     int    _pre_nms_topN;
1517     int    _post_nms_topN;
1518     float  _nms_thres;
1519     float  _min_size;
1520     size_t _values_per_roi;
1521 };
1522 
1523 /** ComputeAnchors information class */
1524 class ComputeAnchorsInfo
1525 {
1526 public:
1527     /** Constructor
1528      *
1529      * @param[in] feat_width     Feature map width
1530      * @param[in] feat_height    Feature map height
1531      * @param[in] spatial_scale  Feature map scale
1532      * @param[in] values_per_roi (Optional)Values used to represent a ROI(Region Of Interest). Defaults to 4
1533      */
1534     ComputeAnchorsInfo(float feat_width, float feat_height, float spatial_scale, size_t values_per_roi = 4)
_feat_height(feat_height)1535         : _feat_height(feat_height),
1536           _feat_width(feat_width),
1537           _spatial_scale(spatial_scale),
1538           _values_per_roi(values_per_roi)
1539     {
1540     }
1541 
1542     /* Get the height of the feature map */
feat_height()1543     float feat_height() const
1544     {
1545         return _feat_height;
1546     }
1547 
1548     /* Get the width of the feature map */
feat_width()1549     float feat_width() const
1550     {
1551         return _feat_width;
1552     }
1553 
1554     /* Get the scale of the feature map */
spatial_scale()1555     float spatial_scale() const
1556     {
1557         return _spatial_scale;
1558     }
1559 
1560     /* Get the values used to represent a ROI(Region Of Interest)*/
values_per_roi()1561     size_t values_per_roi() const
1562     {
1563         return _values_per_roi;
1564     }
1565 
1566 private:
1567     float  _feat_height;
1568     float  _feat_width;
1569     float  _spatial_scale;
1570     size_t _values_per_roi;
1571 };
1572 
1573 /** Bounding Box Transform information class */
1574 class BoundingBoxTransformInfo final
1575 {
1576 public:
1577     /** Constructor
1578      *
1579      * @param[in] img_width                Width of the original image
1580      * @param[in] img_height               Height, of the original image
1581      * @param[in] scale                    Scale of the original image
1582      * @param[in] apply_scale              (Optional)Re-apply scaling after transforming the boxes. Defaults to false
1583      * @param[in] weights                  (Optional)Weights [wx, wy, ww, wh] for the deltas. Defaults to all ones
1584      * @param[in] correct_transform_coords (Optional)Correct bounding box transform coordinates. Defaults to false
1585      * @param[in] bbox_xform_clip          (Optional)Minimum bounding box width and height after bounding box transformation in log-space. Defaults to log(1000/16)
1586      */
1587     BoundingBoxTransformInfo(float img_width, float img_height, float scale, bool apply_scale = false, const std::array<float, 4> weights = { { 1.f, 1.f, 1.f, 1.f } }, bool correct_transform_coords =
1588     false,
1589     float bbox_xform_clip =
1590         4.135166556742356f)
_img_width(img_width)1591         : _img_width(img_width), _img_height(img_height), _scale(scale), _apply_scale(apply_scale), _correct_transform_coords(correct_transform_coords), _weights(weights), _bbox_xform_clip(bbox_xform_clip)
1592     {
1593     }
1594 
weights()1595     std::array<float, 4> weights() const
1596     {
1597         return _weights;
1598     }
1599 
bbox_xform_clip()1600     float bbox_xform_clip() const
1601     {
1602         return _bbox_xform_clip;
1603     }
1604 
img_height()1605     float img_height() const
1606     {
1607         return _img_height;
1608     }
1609 
img_width()1610     float img_width() const
1611     {
1612         return _img_width;
1613     }
1614 
scale()1615     float scale() const
1616     {
1617         return _scale;
1618     }
1619 
apply_scale()1620     bool apply_scale() const
1621     {
1622         return _apply_scale;
1623     }
1624 
correct_transform_coords()1625     bool correct_transform_coords() const
1626     {
1627         return _correct_transform_coords;
1628     }
1629 
1630 private:
1631     float _img_width;
1632     float _img_height;
1633     float _scale;
1634     bool  _apply_scale;
1635     bool  _correct_transform_coords;
1636     std::array<float, 4> _weights;
1637     float _bbox_xform_clip;
1638 };
1639 
1640 /** Activation Layer Information class */
1641 class ActivationLayerInfo
1642 {
1643 public:
1644     /** Available activation functions */
1645     enum class ActivationFunction
1646     {
1647         LOGISTIC,        /**< Logistic ( \f$ f(x) = \frac{1}{1 + e^{-x}} \f$ ) */
1648         TANH,            /**< Hyperbolic tangent ( \f$ f(x) = a \cdot tanh(b \cdot x) \f$ ) */
1649         RELU,            /**< Rectifier ( \f$ f(x) = max(0,x) \f$ ) */
1650         BOUNDED_RELU,    /**< Upper Bounded Rectifier ( \f$ f(x) = min(a, max(0,x)) \f$ ) */
1651         LU_BOUNDED_RELU, /**< Lower and Upper Bounded Rectifier ( \f$ f(x) = min(a, max(b,x)) \f$ ) */
1652         LEAKY_RELU,      /**< Leaky Rectifier ( \f$ f(x) = \begin{cases}  \alpha x & \quad \text{if } x \text{ < 0}\\  x & \quad \text{if } x \geq \text{ 0 } \end{cases} \f$ ) */
1653         SOFT_RELU,       /**< Soft Rectifier ( \f$ f(x)= log(1+e^x) \f$ ) */
1654         ELU,             /**< Exponential Linear Unit ( \f$ f(x) = \begin{cases}  \alpha (exp(x) - 1) & \quad \text{if } x \text{ < 0}\\  x & \quad \text{if } x \geq \text{ 0 } \end{cases} \f$ ) */
1655         ABS,             /**< Absolute ( \f$ f(x)= |x| \f$ ) */
1656         SQUARE,          /**< Square ( \f$ f(x)= x^2 \f$ )*/
1657         SQRT,            /**< Square root ( \f$ f(x) = \sqrt{x} \f$ )*/
1658         LINEAR,          /**< Linear ( \f$ f(x)= ax + b \f$ ) */
1659         IDENTITY,        /**< Identity ( \f$ f(x)= x \f$ ) */
1660         HARD_SWISH,      /**< Hard-swish ( \f$ f(x) = (x \text{ReLU6}(x+3))/6 = x \min(\max(0,x+3),6)/6 \f$ ) */
1661         SWISH,           /**< Swish ( \f$ f(x) = \frac{x}{1 + e^{-ax}} = x \text{logistic}(ax) \f$ ) */
1662         GELU             /**< GELU ( \f$ f(x) = x * 1/2 * 1 + erf(x / \sqrt{2}) \f$ ) */
1663     };
1664 
1665     /** Lookup table  */
1666     using LookupTable256 = std::array<qasymm8_t, 256>;
1667 
1668     ActivationLayerInfo() = default;
1669     /** Default Constructor
1670      *
1671      * @param[in] f The activation function to use.
1672      * @param[in] a (Optional) The alpha parameter used by some activation functions
1673      *              (@ref ActivationFunction::BOUNDED_RELU, @ref ActivationFunction::LU_BOUNDED_RELU, @ref ActivationFunction::LINEAR, @ref ActivationFunction::TANH).
1674      * @param[in] b (Optional) The beta parameter used by some activation functions (@ref ActivationFunction::LINEAR, @ref ActivationFunction::LU_BOUNDED_RELU, @ref ActivationFunction::TANH).
1675      */
1676     ActivationLayerInfo(ActivationFunction f, float a = 0.0f, float b = 0.0f)
_act(f)1677         : _act(f), _a(a), _b(b), _enabled(true)
1678     {
1679     }
1680     /** Get the type of activation function */
activation()1681     ActivationFunction activation() const
1682     {
1683         return _act;
1684     }
1685     /** Get the alpha value */
a()1686     float a() const
1687     {
1688         return _a;
1689     }
1690     /** Get the beta value */
b()1691     float b() const
1692     {
1693         return _b;
1694     }
1695     /** Check if initialised */
enabled()1696     bool enabled() const
1697     {
1698         return _enabled;
1699     }
1700 
1701 #ifdef __aarch64__
lut()1702     const LookupTable256 &lut() const
1703     {
1704         return _lut;
1705     }
1706 
init_lut(DataType data_type,const UniformQuantizationInfo & qi_in,const UniformQuantizationInfo & qi_out)1707     void init_lut(DataType data_type, const UniformQuantizationInfo &qi_in, const UniformQuantizationInfo &qi_out)
1708     {
1709         if(_act == ActivationFunction::HARD_SWISH)
1710         {
1711             if(data_type == DataType::QASYMM8)
1712             {
1713                 qasymm8_hard_swish_populate_table(_lut, qi_in, qi_out);
1714             }
1715             else
1716             {
1717                 qasymm8_signed_hard_swish_populate_table(_lut, qi_in, qi_out);
1718             }
1719         }
1720         else if(_act == ActivationFunction::LEAKY_RELU)
1721         {
1722             qasymm8_leaky_relu_populate_table(_lut, qi_in, qi_out, _a);
1723         }
1724         else if(_act == ActivationFunction::LOGISTIC)
1725         {
1726             if(data_type == DataType::QASYMM8)
1727             {
1728                 qasymm8_logistic_populate_table(_lut, qi_in, qi_out);
1729             }
1730             else
1731             {
1732                 qasymm8_signed_logistic_populate_table(_lut, qi_in, qi_out);
1733             }
1734         }
1735     }
1736 #endif // __aarch64__
1737 
is_lut_supported(ActivationFunction act_func,DataType data_type)1738     static inline bool is_lut_supported(ActivationFunction act_func, DataType data_type)
1739     {
1740 #ifdef __aarch64__
1741         switch(act_func)
1742         {
1743             case ActivationFunction::HARD_SWISH:
1744                 return data_type == DataType::QASYMM8 || data_type == DataType::QASYMM8_SIGNED;
1745             case ActivationFunction::LEAKY_RELU:
1746                 return data_type == DataType::QASYMM8;
1747             case ActivationFunction::LOGISTIC:
1748                 return data_type == DataType::QASYMM8 || data_type == DataType::QASYMM8_SIGNED;
1749             default:
1750                 return false;
1751         }
1752 #else  // __aarch64__
1753         ARM_COMPUTE_UNUSED(act_func);
1754         ARM_COMPUTE_UNUSED(data_type);
1755         return false;
1756 #endif // __aarch64__
1757     }
1758 
1759 private:
1760     ActivationFunction _act     = { ActivationLayerInfo::ActivationFunction::IDENTITY };
1761     float              _a       = {};
1762     float              _b       = {};
1763     bool               _enabled = { false };
1764 
1765 #ifdef __aarch64__
1766     LookupTable256 _lut = {};
1767 
qasymm8_hard_swish_populate_table(LookupTable256 & lut,const UniformQuantizationInfo & qi_in,const UniformQuantizationInfo & qi_out)1768     static inline void qasymm8_hard_swish_populate_table(LookupTable256 &lut, const UniformQuantizationInfo &qi_in, const UniformQuantizationInfo &qi_out)
1769     {
1770         for(size_t i = 0; i < lut.size(); ++i)
1771         {
1772             lut[i] = qasymm8_hard_swish(i, qi_in, qi_out);
1773         }
1774     }
1775 
qasymm8_signed_hard_swish_populate_table(LookupTable256 & lut,const UniformQuantizationInfo & qi_in,const UniformQuantizationInfo & qi_out)1776     static inline void qasymm8_signed_hard_swish_populate_table(LookupTable256 &lut, const UniformQuantizationInfo &qi_in, const UniformQuantizationInfo &qi_out)
1777     {
1778         for(size_t i = 0; i < lut.size(); ++i)
1779         {
1780             lut[i] = qasymm8_signed_hard_swish(i, qi_in, qi_out);
1781         }
1782     }
1783 
qasymm8_leaky_relu_populate_table(LookupTable256 & lut,const UniformQuantizationInfo & qi_in,const UniformQuantizationInfo & qi_out,float alpha)1784     static inline void qasymm8_leaky_relu_populate_table(LookupTable256 &lut, const UniformQuantizationInfo &qi_in, const UniformQuantizationInfo &qi_out, float alpha)
1785     {
1786         for(size_t i = 0; i < lut.size(); ++i)
1787         {
1788             lut[i] = qasymm8_leaky_relu(i, qi_in, qi_out, alpha);
1789         }
1790     }
1791 
qasymm8_logistic_populate_table(LookupTable256 & lut,const UniformQuantizationInfo & qi_in,const UniformQuantizationInfo & qi_out)1792     static inline void qasymm8_logistic_populate_table(LookupTable256 &lut, const UniformQuantizationInfo &qi_in, const UniformQuantizationInfo &qi_out)
1793     {
1794         for(size_t i = 0; i < lut.size(); ++i)
1795         {
1796             lut[i] = qasymm8_logistic(i, qi_in, qi_out);
1797         }
1798     }
1799 
qasymm8_signed_logistic_populate_table(LookupTable256 & lut,const UniformQuantizationInfo & qi_in,const UniformQuantizationInfo & qi_out)1800     static inline void qasymm8_signed_logistic_populate_table(LookupTable256 &lut, const UniformQuantizationInfo &qi_in, const UniformQuantizationInfo &qi_out)
1801     {
1802         for(size_t i = 0; i < lut.size(); ++i)
1803         {
1804             lut[i] = qasymm8_signed_logistic(static_cast<int8_t>(i), qi_in, qi_out);
1805         }
1806     }
1807 #endif // __aarch64__
1808 };
1809 
1810 /** Fully connected layer info */
1811 struct FullyConnectedLayerInfo
1812 {
1813     /* Fused-activation parameters */
1814     ActivationLayerInfo activation_info{}; /**<  Fused activation to apply after the matrix multiplication. */
1815     /* Information about weights */
1816     DataLayout weights_trained_layout{ DataLayout::NCHW }; /**<  Layout that the weights have been trained with. */
1817     bool       transpose_weights{ true };                  /**<  Transpose weights if true. */
1818     bool       are_weights_reshaped{ false };              /**<  Reshape the weights tensor if false. */
1819     bool       retain_internal_weights{ false };           /**<  Retain internal reshaped weights. */
1820     bool       enable_fast_math{ false };                  /**<  Enable fast math computation. */
1821     /* Other parameters */
1822     bool fp_mixed_precision{ false }; /**<  Use wider accumulators (32 bit instead of 16 for FP16) to improve accuracy. */
1823 
1824     /** Sets the weights trained data layout
1825      *
1826      * @param[in] layout Data layout that the weights were trained with
1827      *
1828      * @return Updated object
1829      */
set_weights_trained_layoutFullyConnectedLayerInfo1830     FullyConnectedLayerInfo &set_weights_trained_layout(DataLayout layout)
1831     {
1832         weights_trained_layout = layout;
1833         return *this;
1834     }
1835     /** Sets the transpose weights flag
1836      *
1837      * @param[in] should_transpose_weights Boolean flag indicating if weights should be transposed
1838      *
1839      * @return Updated object
1840      */
set_transpose_weightsFullyConnectedLayerInfo1841     FullyConnectedLayerInfo &set_transpose_weights(bool should_transpose_weights)
1842     {
1843         transpose_weights = should_transpose_weights;
1844         return *this;
1845     }
1846 };
1847 
1848 /** Normalization Layer Information class */
1849 class NormalizationLayerInfo
1850 {
1851 public:
1852     /** Default Constructor
1853      *
1854      * @param[in] type      The normalization type. Can be @ref NormType::IN_MAP_1D, @ref NormType::IN_MAP_2D or @ref NormType::CROSS_MAP
1855      * @param[in] norm_size The normalization size is the number of elements to normalize across. Defaults to 5.
1856      * @param[in] alpha     (Optional) Alpha parameter used by normalization equation. Defaults to 0.0001.
1857      * @param[in] beta      (Optional) Beta parameter used by normalization equation. Defaults to 0.5.
1858      * @param[in] kappa     (Optional) Kappa parameter used by [Krichevksy 2012] Across Channel Local Brightness Normalization equation.
1859      * @param[in] is_scaled (Optional) Boolean that specifies if alpha will be scaled by the normalization size or not.
1860      *                      Should be false to follow [Krichevksy 2012].
1861      */
1862     NormalizationLayerInfo(NormType type, uint32_t norm_size = 5, float alpha = 0.0001f, float beta = 0.5f, float kappa = 1.f, bool is_scaled = true)
_type(type)1863         : _type(type), _norm_size(norm_size), _alpha(alpha), _beta(beta), _kappa(kappa), _is_scaled(is_scaled)
1864     {
1865     }
1866     /** Get the normalization type */
type()1867     NormType type() const
1868     {
1869         return _type;
1870     }
1871     /** Get the normalization size */
norm_size()1872     uint32_t norm_size() const
1873     {
1874         return _norm_size;
1875     }
1876     /** Get the alpha value */
alpha()1877     float alpha() const
1878     {
1879         return _alpha;
1880     }
1881     /** Get the beta value */
beta()1882     float beta() const
1883     {
1884         return _beta;
1885     }
1886     /** Get the kappa value */
kappa()1887     float kappa() const
1888     {
1889         return _kappa;
1890     }
1891     /** Get the is_scaled value */
is_scaled()1892     bool is_scaled() const
1893     {
1894         return _is_scaled;
1895     }
1896     /** Check if normalization is cross map */
is_cross_map()1897     bool is_cross_map() const
1898     {
1899         return _type == NormType::CROSS_MAP;
1900     }
1901     /** Check if normalization is not cross map */
is_in_map()1902     bool is_in_map() const
1903     {
1904         return !is_cross_map();
1905     }
1906     /** Return the scaling factor of the normalization function.
1907      *
1908      * If is_scaled is set to false then [Krichevksy 2012] normalization scaling is performed,
1909      * where alpha is returned plainly, else alpha is scaled by the total number of elements used for the normalization.
1910      *
1911      * @return The normalization scaling factor.
1912      */
scale_coeff()1913     float scale_coeff() const
1914     {
1915         const uint32_t size = (_type == NormType::IN_MAP_2D) ? _norm_size * _norm_size : _norm_size;
1916         return (_is_scaled) ? (_alpha / size) : _alpha;
1917     }
1918 
1919 private:
1920     NormType _type;
1921     uint32_t _norm_size;
1922     float    _alpha;
1923     float    _beta;
1924     float    _kappa;
1925     bool     _is_scaled;
1926 };
1927 
1928 class StridedSliceLayerInfo
1929 {
1930 public:
1931     /** Default Constructor
1932      *
1933      * @param[in] begin_mask       (Optional) If the ith bit of begin_mask is set, starts[i] is ignored and the fullest possible range in that dimension is used instead.
1934      * @param[in] end_mask         (Optional) If the ith bit of end_mask is set, ends[i] is ignored and the fullest possible range in that dimension is used instead.
1935      * @param[in] shrink_axis_mask (Optional) If the ith bit of shrink_axis_mask is set, it implies that the ith specification shrinks the dimensionality by 1.
1936      */
1937     StridedSliceLayerInfo(int32_t begin_mask = 0, int32_t end_mask = 0, int32_t shrink_axis_mask = 0)
_begin_mask(begin_mask)1938         : _begin_mask(begin_mask), _end_mask(end_mask), _shrink_axis_mask(shrink_axis_mask)
1939     {
1940     }
1941 
1942     /* Get the begin mask value */
begin_mask()1943     int32_t begin_mask() const
1944     {
1945         return _begin_mask;
1946     }
1947 
1948     /* Get the end mask value */
end_mask()1949     int32_t end_mask() const
1950     {
1951         return _end_mask;
1952     }
1953 
1954     /* Get the shrink axis mask value */
shrink_axis_mask()1955     int32_t shrink_axis_mask() const
1956     {
1957         return _shrink_axis_mask;
1958     }
1959 
1960 private:
1961     int32_t _begin_mask;
1962     int32_t _end_mask;
1963     int32_t _shrink_axis_mask;
1964 };
1965 
1966 /** Memory layouts for the weights tensor.
1967   *
1968   * * UNSPECIFIED is used to select kernels that do not run in
1969   *    variable weights mode.
1970   *
1971   * * ANY is used to query the kernel database to retrieve any of the
1972   *   kernels that runs in variable weights mode. Once a kernel is
1973   *   found, the specific format expected by the kernel can be
1974   *   retrieved by the user for reordering the weights tensor
1975   *   accordingly.
1976   *
1977   * The other values OHWIo{interleave_by}i{block_by} describe the
1978   * memory layout of a 4D tensor with layout OHWI that has been
1979   * transformed into a 4D tensor with dimensions O'HWI' where:
1980   *
1981   * O' = first multiple of {interleave_by} s.t. O<=O'
1982   * I' = first multiple of {block_by} s.t. I<=I'
1983   *
1984   * The total size of the dst tensor is O' x H x W x I'
1985   *
1986   * The access function of the tensor with layout
1987   * OHWIo{interleave_by}i{block_by} and size O'HWI' is a 6-parameter
1988   * access function, where the 6 parameters are computed as follows:
1989   *
1990   * x5 = floor(o/{interleave_by}) RANGE [0, O'/{interleave_by} -1] SIZE: O'/{interleave_by}
1991   *
1992   * x4 = h                        RANGE [0, H-1]                   SIZE: H
1993   * x3 = w                        RANGE [0, W-1]                   SIZE: W
1994   * x2 = floor(i/{block_by})      RANGE [0, I'/{block_by} -1]      SIZE: I'/{block_by}
1995   * x1 = o%{interleave_by}        RANGE [0, {interleave_by} -1]    SIZE: {interleave_by}
1996   * x0 = i%{block_by}             RANGE [0, {block_by} -1]         SIZE: {block_by}
1997   *                                                          TOTAL SIZE: O' * H * W * I'
1998   *
1999   *        4D                       6D
2000   * -----------------   -----------------------------------
2001   * value(o, h, w, i) =   x5 * H * W * I' * {interleave_by}
2002   *                     + x4 * W * I' * {interleave_by}
2003   *                     + x3 * I' * {interleave_by}
2004   *                     + x2 * {interleave_by} * {block_by}
2005   *                     + x1 * {block_by}
2006   *                     + x0
2007   *
2008   * Notice that in arm_gemm the 4D tensor of dimension O'HWI' created
2009   * for the OHWIo{interleave_by}i{block_by} format is in reality seen
2010   * as a 2D tensor, where the number of rows is O'/{interleave_by}
2011   * and the number of columns is {interleave_by} * H * W * I'.
2012   *
2013   * The postfix *_bf16 is for the memory layout needed for the
2014   * fast-mode kernels, in which the weights are passed in bfloat16
2015   * format.
2016   */
2017 enum class WeightFormat
2018 {
2019     UNSPECIFIED    = 0x1,
2020     ANY            = 0x2,
2021     OHWI           = 0x100100,
2022     OHWIo2         = 0x100200,
2023     OHWIo4         = 0x100400,
2024     OHWIo8         = 0x100800,
2025     OHWIo16        = 0x101000,
2026     OHWIo32        = 0x102000,
2027     OHWIo64        = 0x104000,
2028     OHWIo128       = 0x108000,
2029     OHWIo4i2       = 0x200400,
2030     OHWIo4i2_bf16  = 0x200410,
2031     OHWIo8i2       = 0x200800,
2032     OHWIo8i2_bf16  = 0x200810,
2033     OHWIo16i2      = 0x201000,
2034     OHWIo16i2_bf16 = 0x201010,
2035     OHWIo32i2      = 0x202000,
2036     OHWIo32i2_bf16 = 0x202010,
2037     OHWIo64i2      = 0x204000,
2038     OHWIo64i2_bf16 = 0x204010,
2039     OHWIo4i4       = 0x400400,
2040     OHWIo4i4_bf16  = 0x400410,
2041     OHWIo8i4       = 0x400800,
2042     OHWIo8i4_bf16  = 0x400810,
2043     OHWIo16i4      = 0x401000,
2044     OHWIo16i4_bf16 = 0x401010,
2045     OHWIo32i4      = 0x402000,
2046     OHWIo32i4_bf16 = 0x402010,
2047     OHWIo64i4      = 0x404000,
2048     OHWIo64i4_bf16 = 0x404010,
2049     OHWIo2i8       = 0x800200,
2050     OHWIo4i8       = 0x800400,
2051     OHWIo8i8       = 0x800800,
2052     OHWIo16i8      = 0x801000,
2053     OHWIo32i8      = 0x802000,
2054     OHWIo64i8      = 0x804000
2055 };
2056 // OHWIo<interleave_by>i<block_by>
interleave_by(const WeightFormat wf)2057 inline int interleave_by(const WeightFormat wf)
2058 {
2059     return (static_cast<int>(wf) >> 8) & 0xFFF;
2060 }
block_by(const WeightFormat wf)2061 inline int block_by(const WeightFormat wf)
2062 {
2063     return (static_cast<int>(wf) >> 20) & 0xF;
2064 }
is_fixed_format(const WeightFormat & wf)2065 inline bool is_fixed_format(const WeightFormat &wf)
2066 {
2067     return wf != WeightFormat::UNSPECIFIED && wf != WeightFormat::ANY;
2068 }
is_fixed_format_fast_math(const WeightFormat & wf)2069 inline bool is_fixed_format_fast_math(const WeightFormat &wf)
2070 {
2071     return (static_cast<int>(wf) >> 4) & 0x1;
2072 }
2073 
2074 /** Convolution Layer Weights Information class. This class stores the necessary information to compute convolution layer when the weights are already reshaped */
2075 class WeightsInfo
2076 {
2077 public:
2078     /** Default constructor */
WeightsInfo()2079     WeightsInfo()
2080         : _are_reshaped(false), _kernel_width(0), _kernel_height(0), _num_kernels(0), _retain_internal_weights(false), _weight_format(arm_compute::WeightFormat::UNSPECIFIED)
2081     {
2082     }
2083     /** Constructor
2084      *
2085      * @param[in] are_reshaped            True if the weights have been reshaped
2086      * @param[in] kernel_width            Kernel width.
2087      * @param[in] kernel_height           Kernel height.
2088      * @param[in] num_kernels             Number of convolution kernels.
2089      * @param[in] retain_internal_weights (Optional) True if internal reshaped weights must be retained. Used for reconfiguration purposes. Default is false.
2090      * @param[in] weight_format           (Optional) arm_gemm:WeightFormat enumeration requested by the user. Default is arm_compute::WeightFormat::UNSPECIFIED.
2091      */
2092     WeightsInfo(bool are_reshaped, unsigned int kernel_width, unsigned int kernel_height, unsigned int num_kernels, bool retain_internal_weights = false,
2093                 arm_compute::WeightFormat weight_format = arm_compute::WeightFormat::UNSPECIFIED)
_are_reshaped(are_reshaped)2094         : _are_reshaped(are_reshaped), _kernel_width(kernel_width), _kernel_height(kernel_height), _num_kernels(num_kernels), _retain_internal_weights(retain_internal_weights), _weight_format(weight_format)
2095     {
2096     }
2097     /** Flag which specifies if the weights tensor has been reshaped.
2098      *
2099      * @return True if the weights tensors has been reshaped
2100      */
are_reshaped()2101     bool are_reshaped() const
2102     {
2103         return _are_reshaped;
2104     };
2105     /** Return the number of convolution kernels
2106      *
2107      * @return The number of convolution kernels
2108      */
num_kernels()2109     unsigned int num_kernels() const
2110     {
2111         return _num_kernels;
2112     };
2113     /** Return the width and height of the kernel
2114      *
2115      * @return The width and height of the kernel
2116      */
kernel_size()2117     std::pair<unsigned int, unsigned int> kernel_size() const
2118     {
2119         return std::make_pair(_kernel_width, _kernel_height);
2120     }
retain_internal_weights()2121     bool retain_internal_weights() const
2122     {
2123         return _retain_internal_weights;
2124     }
weight_format()2125     arm_compute::WeightFormat weight_format() const
2126     {
2127         return _weight_format;
2128     }
set_weight_format(arm_compute::WeightFormat weight_format)2129     void set_weight_format(arm_compute::WeightFormat weight_format)
2130     {
2131         _weight_format = weight_format;
2132     }
2133 
kernel_width()2134     unsigned int kernel_width() const
2135     {
2136         return _kernel_width;
2137     }
kernel_height()2138     unsigned int kernel_height() const
2139     {
2140         return _kernel_height;
2141     }
2142 
2143 private:
2144     bool                      _are_reshaped;
2145     unsigned int              _kernel_width;
2146     unsigned int              _kernel_height;
2147     unsigned int              _num_kernels;
2148     bool                      _retain_internal_weights;
2149     arm_compute::WeightFormat _weight_format;
2150 };
2151 
2152 /** GEMM reshape information class. This class stores the necessary information about matrix A and matrix B reshape.
2153  *
2154  * The matrix A can only be reshaped through @ref opencl::kernels::ClGemmReshapeLhsMatrixKernel or  @ref cpu::kernels::CpuGemmInterleave4x4Kernel
2155  * Note: Optionally just for @ref opencl::kernels::ClGemmReshapeLhsMatrixKernel is it possible to set mult_interleave4x4_height, the multiplication factor for the height of the 4x4 interleaved block
2156  *
2157  * The matrix B can only be reshaped through @ref opencl::kernels::ClGemmReshapeRhsMatrixKernel or  @ref cpu::kernels::CpuGemmTranspose1xWKernel
2158  * Note: Optionally just for @ref opencl::kernels::ClGemmReshapeRhsMatrixKernel is it possible to set mult_transpose1xW_width, the multiplication factor for the width of the 1xW transposed block
2159  *
2160  */
2161 class GEMMReshapeInfo final
2162 {
2163 public:
2164     /** Default constructor */
GEMMReshapeInfo()2165     GEMMReshapeInfo()
2166         : _m(1), _n(1), _k(1), _mult_transpose1xW_width(1), _mult_interleave4x4_height(1), _depth_output_gemm3d(0), _reinterpret_input_as_3d(false), _broadcast_bias(false)
2167     {
2168     }
2169     /** Constructor
2170      *
2171      * @param[in] m                         Number of matrix A rows
2172      * @param[in] n                         Number of matrix B columns
2173      * @param[in] k                         Number of matrix A columns or matrix B rows
2174      * @param[in] mult_transpose1xW_width   (Optional) Multiplication factor for the width of the 1xW transposed block
2175      * @param[in] mult_interleave4x4_height (Optional) Multiplication factor for the height of the 4x4 interleaved block
2176      * @param[in] depth_output_gemm3d       (Optional) Depth (third dimension) of the output tensor to be used with the GEMM3D kernel.
2177      *                                      If 0 the output will not be reinterpreted as 3D. Default 0
2178      * @param[in] reinterpret_input_as_3d   (Optional) Reinterpret the input as 3D tensor. (i.e. this flag should be set to true when GEMM is used
2179      *                                      to perform 1x1 convolutions with the NHWC data layout)
2180      * @param[in] broadcast_bias            (Optional) Broadcast the shape of the bias tensor from a vector to a matrix.
2181      */
2182     GEMMReshapeInfo(int m, int n, int k, int mult_transpose1xW_width = 1, int mult_interleave4x4_height = 1, int depth_output_gemm3d = 0, bool reinterpret_input_as_3d = false, bool broadcast_bias = false)
_m(m)2183         : _m(m), _n(n), _k(k), _mult_transpose1xW_width(mult_transpose1xW_width), _mult_interleave4x4_height(mult_interleave4x4_height), _depth_output_gemm3d(depth_output_gemm3d),
2184           _reinterpret_input_as_3d(reinterpret_input_as_3d), _broadcast_bias(broadcast_bias)
2185     {
2186     }
2187     /** Number of matrix A rows
2188      *
2189      * @return the number of matrix A rows
2190      */
m()2191     int m() const
2192     {
2193         return _m;
2194     }
2195     /** Number of matrix B columns
2196      *
2197      * @return the number of matrix B columns
2198      */
n()2199     int n() const
2200     {
2201         return _n;
2202     }
2203     /** Number of matrix A columns or matrix B rows
2204      *
2205      * @return the number of matrix A columns or matrix B rows
2206      */
k()2207     int k() const
2208     {
2209         return _k;
2210     }
2211     /** Multiplication factor for the width of the 1xW transposed block
2212      *
2213      * @return the multiplication factor for the width of the 1xW transposed block
2214      */
mult_transpose1xW_width()2215     int mult_transpose1xW_width() const
2216     {
2217         return _mult_transpose1xW_width;
2218     }
2219     /** Multiplication factor for the height of the 4x4 interleaved block
2220      *
2221      * @return the multiplication factor for the height of the 4x4 interleaved block
2222      */
mult_interleave4x4_height()2223     int mult_interleave4x4_height() const
2224     {
2225         return _mult_interleave4x4_height;
2226     }
2227     /** Depth (third dimension) of the output tensor to be used with the GEMM3D kernel
2228      *
2229      * @note GEMM3D kernel is used when the output has to be reinterpret as 3D tensor. In that case:
2230      *       m = depth_output_gemm3d * output_height
2231      *
2232      * @return the depth of the output tensor to be used with the GEMM3D kernel
2233      */
depth_output_gemm3d()2234     int depth_output_gemm3d() const
2235     {
2236         return _depth_output_gemm3d;
2237     }
2238     /** Flag which specifies if the input tensor has to be reinterpreted as 3D
2239      *
2240      * @return True if the input tensor has to be reinterpreted as 3D tensor
2241      */
reinterpret_input_as_3d()2242     bool reinterpret_input_as_3d() const
2243     {
2244         return _reinterpret_input_as_3d;
2245     };
2246     /** Flag which specifies whether to broadcast the shape of the bias tensor.
2247      *
2248      * @return True if the shape of the bias tensor is to be broadcasted.
2249      */
broadcast_bias()2250     bool broadcast_bias() const
2251     {
2252         return _broadcast_bias;
2253     };
2254 
2255 private:
2256     int  _m;
2257     int  _n;
2258     int  _k;
2259     int  _mult_transpose1xW_width;
2260     int  _mult_interleave4x4_height;
2261     int  _depth_output_gemm3d;
2262     bool _reinterpret_input_as_3d;
2263     bool _broadcast_bias;
2264 };
2265 
2266 struct ConvolutionInfo
2267 {
2268     ConvolutionInfo() = default;
ConvolutionInfoConvolutionInfo2269     ConvolutionInfo(const PadStrideInfo &pad_stride_info, unsigned int depth_multiplier, const ActivationLayerInfo &act_info, const Size2D &dilation)
2270         : pad_stride_info(pad_stride_info), depth_multiplier(depth_multiplier), act_info(act_info), dilation(dilation)
2271     {
2272     }
2273     PadStrideInfo       pad_stride_info{};        /**< Convolution info (Pads, strides,...) */
2274     unsigned int        depth_multiplier{ 1 };    /**< Multiplier to apply to input's depth to retrieve the output depth. Defaults to 1 */
2275     ActivationLayerInfo act_info{};               /**< Fused activation to apply after convolution. */
2276     Size2D              dilation{ Size2D(1, 1) }; /**< Dilation, in elements, across x and y. Defaults to (1, 1). */
2277 };
2278 
2279 /** GEMMLowp output stage type */
2280 enum class GEMMLowpOutputStageType
2281 {
2282     NONE,                     /**< No quantization */
2283     QUANTIZE_DOWN,            /**< Quantize using an integer multiplication */
2284     QUANTIZE_DOWN_FIXEDPOINT, /**< Quantize using a fixed point multiplication */
2285     QUANTIZE_DOWN_FLOAT       /**< Quantize using a floating point multiplication */
2286 };
2287 
2288 /** GEMMLowp output stage info */
2289 struct GEMMLowpOutputStageInfo
2290 {
2291     GEMMLowpOutputStageType type{ GEMMLowpOutputStageType::NONE };                        /**< GEMMLowp output stage type */
2292     int32_t                 gemmlowp_offset{ 0 };                                         /**< GEMMLowp output stage offset used for quantizing to QASYMM8 */
2293     int32_t                 gemmlowp_multiplier{ 0 };                                     /**< GEMMLowp output stage multiplier used for quantizing to QASYMM8 */
2294     int32_t                 gemmlowp_shift{ 0 };                                          /**< GEMMLowp output stage shift used for quantizing to uint8 */
2295     int32_t                 gemmlowp_min_bound{ std::numeric_limits<int32_t>::lowest() }; /**< GEMMLowp min value used to saturate down the output result before converting back to QASYMM8 */
2296     int32_t                 gemmlowp_max_bound{ std::numeric_limits<int32_t>::max() };    /**< GEMMLowp max value used to saturate down the output result before converting back to QASYMM8 */
2297     std::vector<int32_t>    gemmlowp_multipliers{};                                       /**< GEMMLowp output stage multiplier used for quantizing to QASYMM8 */
2298     std::vector<int32_t>    gemmlowp_shifts{};                                            /**< GEMMLowp output stage multiplier used for quantizing to QASYMM8 */
2299     float                   gemmlowp_real_multiplier{ 0 };                                /**< GEMMLowp output stage real multiplier used for quantizing to QASYMM8 */
2300     bool                    is_quantized_per_channel{ false };                            /**< GEMMLowp quantized per-channel flag */
2301     DataType                output_data_type{ DataType::UNKNOWN };                        /**< Output tensor data type to use if the output is not initialized */
2302 };
2303 
2304 /** GEMM LHS (Left Hand Side) matrix information */
2305 struct GEMMLHSMatrixInfo
2306 {
2307     GEMMLHSMatrixInfo() = default;
GEMMLHSMatrixInfoGEMMLHSMatrixInfo2308     GEMMLHSMatrixInfo(unsigned int m, unsigned int k, unsigned int v, bool trans, bool inter)
2309         : m0(m), k0(k), v0(v), transpose(trans), interleave(inter)
2310     {
2311     }
2312     unsigned int m0{ 1 };            /**< Number of rows processed by the matrix multiplication */
2313     unsigned int k0{ 1 };            /**< Number of partial accumulations performed by the matrix multiplication */
2314     unsigned int v0{ 1 };            /**< Number of vertical blocks of size (m0xk0) stored on the same output row */
2315     bool         transpose{ true };  /**< True if the (m0xk0) block has to be transposed before been stored */
2316     bool         interleave{ true }; /**< True if the v0 (m0xk0) blocks have to be interleaved in the output row */
2317 };
2318 
2319 /** GEMM RHS (Right Hand Side) matrix information */
2320 struct GEMMRHSMatrixInfo
2321 {
2322     GEMMRHSMatrixInfo() = default;
GEMMRHSMatrixInfoGEMMRHSMatrixInfo2323     GEMMRHSMatrixInfo(unsigned int n, unsigned int k, unsigned int h, bool trans, bool inter, bool export_to_cl_img)
2324         : n0(n), k0(k), h0(h), transpose(trans), interleave(inter), export_to_cl_image(export_to_cl_img)
2325     {
2326     }
2327     unsigned int n0{ 1 };                     /**< Number of columns processed by the matrix multiplication */
2328     unsigned int k0{ 1 };                     /**< Number of partial accumulations performed by the matrix multiplication */
2329     unsigned int h0{ 1 };                     /**< Number of horizontal blocks of size (k0xn0) stored on the same output row */
2330     bool         transpose{ true };           /**< True if the (k0xn0) block has to be transposed before been stored */
2331     bool         interleave{ true };          /**< True if the h0 (k0xn0) blocks have to be interleaved in the output row */
2332     bool         export_to_cl_image{ false }; /**< True if the reshaped rhs has to be exported to cl_image. n0 must be equal to 4 */
2333 };
2334 
2335 class ITensorInfo;
2336 /** GEMM information class. This class stores the necessary information to compute GEMM functions
2337  *
2338  * This object also contains the information about how matrix A and matrix B have been reshaped
2339  *
2340  */
2341 class GEMMInfo
2342 {
2343 public:
2344     /** Default constructor */
GEMMInfo()2345     GEMMInfo() noexcept
2346         : _is_a_reshaped(false),
2347           _is_b_reshaped(false),
2348           _reshape_b_only_on_first_run(true),
2349           _depth_output_gemm3d(0),
2350           _reinterpret_input_as_3d(false),
2351           _retain_internal_weights(false),
2352           _gemmlowp_output_stage(),
2353           _fast_math(false),
2354           _fp_mixed_precision(false),
2355           _broadcast_bias(false),
2356           _pretranspose_A(false),
2357           _pretranspose_B(false),
2358           _activation_info(),
2359           _post_ops(),
2360           _fixed_format(false),
2361           _weight_format(arm_compute::WeightFormat::UNSPECIFIED)
2362     {
2363     }
2364     /** Constructor
2365      *
2366      * @param[in] is_a_reshaped               True if the matrix A has been reshaped
2367      * @param[in] is_b_reshaped               True if the matrix B has been reshaped
2368      * @param[in] reshape_b_only_on_first_run Reshape matrix B only for the first run
2369      * @param[in] depth_output_gemm3d         (Optional) Depth (third dimension) of the output tensor to be used with the GEMM3D kernel
2370      *                                        If 0 the output will not be reinterpreted as 3D. Default 0
2371      * @param[in] reinterpret_input_as_3d     (Optional) Reinterpret the input as 3D tensor. (i.e. this flag should be set to true when GEMM is used
2372      *                                        to perform 1x1 convolutions with the NHWC data layout)
2373      * @param[in] retain_internal_weights     (Optional) Retain the weights tensor from previous run
2374      * @param[in] gemmlowp_output_stage       (Optional) GEMMLowp Output stage info
2375      * @param[in] fp_mixed_precision          (Optional) Use wider accumulators (32 bit instead of 16 for FP16) to improve accuracy.
2376      * @param[in] fast_math                   (Optional) Use a data type of shorter width to improve performance
2377      * @param[in] broadcast_bias              (Optional) Broadcast the shape of the bias tensor from a vector to a matrix.
2378      * @param[in] activation_info             (Optional) Activation to apply after the matrix multiplication
2379      * @param[in] post_ops                    (Optional) A sequence of post operations that are performed after the main operation.
2380      * @param[in] fixed_format                (Optional) Specify the selection of fixed format kernels for variable weights support in GEMM. These kernels expect the weights tensor to be in amemory format that is fixed by the kernel itself. For more information, see arm_compute::WeightFormat.
2381      * @param[in] weight_format               (Optional) arm_gemm:WeightFormat enumeration requested by the user. Default is arm_compute::WeightFormat::UNSPECIFIED.
2382      */
2383     GEMMInfo(bool is_a_reshaped, bool is_b_reshaped, bool reshape_b_only_on_first_run, int depth_output_gemm3d = 0, bool reinterpret_input_as_3d = false, bool retain_internal_weights = false,
2384              GEMMLowpOutputStageInfo gemmlowp_output_stage = GEMMLowpOutputStageInfo(), bool fp_mixed_precision = false, bool fast_math = false, bool broadcast_bias = false,
2385              const ActivationLayerInfo &activation_info = ActivationLayerInfo(), const experimental::PostOpList<ITensorInfo *> &post_ops = experimental::PostOpList<ITensorInfo *>(),
2386              bool fixed_format = false, arm_compute::WeightFormat weight_format = arm_compute::WeightFormat::UNSPECIFIED) noexcept
_is_a_reshaped(is_a_reshaped)2387         : _is_a_reshaped(is_a_reshaped),
2388           _is_b_reshaped(is_b_reshaped),
2389           _reshape_b_only_on_first_run(reshape_b_only_on_first_run),
2390           _depth_output_gemm3d(depth_output_gemm3d),
2391           _reinterpret_input_as_3d(reinterpret_input_as_3d),
2392           _retain_internal_weights(retain_internal_weights),
2393           _gemmlowp_output_stage(gemmlowp_output_stage),
2394           _fast_math(fast_math),
2395           _fp_mixed_precision(fp_mixed_precision),
2396           _broadcast_bias(broadcast_bias),
2397           _pretranspose_A(false),
2398           _pretranspose_B(false),
2399           _activation_info(activation_info),
2400           _post_ops(post_ops),
2401           _fixed_format(fixed_format),
2402           _weight_format(weight_format)
2403     {
2404     }
2405     /** Flag which specifies if the matrix A has been reshaped
2406      *
2407      * @return True if the matrix A has been reshaped
2408      */
is_a_reshaped()2409     bool is_a_reshaped() const
2410     {
2411         return _is_a_reshaped;
2412     };
2413     /** Flag which specifies if the matrix B has been reshaped
2414      *
2415      * @return True if the matrix B has been reshaped
2416      */
is_b_reshaped()2417     bool is_b_reshaped() const
2418     {
2419         return _is_b_reshaped;
2420     };
2421     /** Flag which specifies if the reshape of matrix B should executed only for the first
2422      *
2423      * @note This flag could be set to TRUE when GEMM is used to accelerate convolution layer
2424      *
2425      * @return True if the reshaped of matrix B happens only for the first run
2426      */
reshape_b_only_on_first_run()2427     bool reshape_b_only_on_first_run() const
2428     {
2429         return _reshape_b_only_on_first_run;
2430     };
2431     /** Depth of the output when GEMM output is reinterpreted as 3D tensor
2432      *
2433      * @return the depth of the output tensor
2434      */
depth_output_gemm3d()2435     int depth_output_gemm3d() const
2436     {
2437         return _depth_output_gemm3d;
2438     };
2439     /** Flag which specifies if the input tensor has to be reinterpreted as 3D
2440      *
2441      * @return True if the input tensor has to be reinterpreted as 3D tensor
2442      */
reinterpret_input_as_3d()2443     bool reinterpret_input_as_3d() const
2444     {
2445         return _reinterpret_input_as_3d;
2446     };
2447     /** Flag which specifies if the weights tensor has to be retained from previous run
2448      *
2449      * @return True if the weights tensor has to be retained
2450      */
retain_internal_weights()2451     bool retain_internal_weights() const
2452     {
2453         return _retain_internal_weights;
2454     };
2455     /** GEMMLowp output stage
2456      *
2457      * @return the GEMMLowp output stage info
2458      */
gemmlowp_output_stage()2459     GEMMLowpOutputStageInfo gemmlowp_output_stage() const
2460     {
2461         return _gemmlowp_output_stage;
2462     };
2463     /** Sets GEMMLowp output stage
2464      *
2465      * @param[in] output_stage Output stage to set
2466      */
set_gemmlowp_output_stage(GEMMLowpOutputStageInfo & output_stage)2467     void set_gemmlowp_output_stage(GEMMLowpOutputStageInfo &output_stage)
2468     {
2469         _gemmlowp_output_stage = output_stage;
2470     };
2471     /** Flag which specifies if a wider accumulator should be used.
2472      *
2473      * @return True if a wider accumulator has to be used
2474      */
fp_mixed_precision()2475     bool fp_mixed_precision() const
2476     {
2477         return _fp_mixed_precision;
2478     };
2479     /** Flag which specifies if a shorter accumulator to be used.
2480      *
2481      * @return True if a shorter accumulator has to be used
2482      */
fast_math()2483     bool fast_math() const
2484     {
2485         return _fast_math;
2486     };
2487     /** Set fast math flag
2488      *
2489      * @param[in] fast_math Flag to set
2490      */
set_fast_math(bool fast_math)2491     void set_fast_math(bool fast_math)
2492     {
2493         _fast_math = fast_math;
2494     }
2495     /** Flag which specifies whether to broadcast the shape of the bias tensor.
2496      *
2497      * @return True if the shape of the bias tensor is to be broadcasted.
2498      */
broadcast_bias()2499     bool broadcast_bias() const
2500     {
2501         return _broadcast_bias;
2502     };
2503     /** Flag which specifies whether A should be pre-transposed if supported.
2504      *
2505      * @return True if A should be pre-transposed else false.
2506      */
pretranspose_A()2507     bool pretranspose_A() const
2508     {
2509         return _pretranspose_A;
2510     };
2511     /** Set pre-transpose A flag
2512      *
2513      * @param[in] flag Flag to set
2514      */
set_pretranspose_A(bool flag)2515     void set_pretranspose_A(bool flag)
2516     {
2517         _pretranspose_A = flag;
2518     }
2519     /** Flag which specifies whether b should be pre-transposed if supported.
2520      *
2521      * @return True if b should be pre-transposed else false.
2522      */
pretranspose_B()2523     bool pretranspose_B() const
2524     {
2525         return _pretranspose_B;
2526     };
2527     /** Set pre-transpose b flag
2528      *
2529      * @param[in] flag Flag to set
2530      */
set_pretranspose_B(bool flag)2531     void set_pretranspose_B(bool flag)
2532     {
2533         _pretranspose_B = flag;
2534     }
2535     /** Activation layer to apply after the matrix multiplication
2536      *
2537      * @return ActivationLayerInfo object
2538      */
activation_info()2539     ActivationLayerInfo activation_info() const
2540     {
2541         return _activation_info;
2542     }
2543     /** Set activation layer info
2544      *
2545      * @param[in] activation_info ActivationLayerInfo object to set
2546      */
set_activation_info(const ActivationLayerInfo & activation_info)2547     void set_activation_info(const ActivationLayerInfo &activation_info)
2548     {
2549         _activation_info = activation_info;
2550     }
2551     /** Post operations to apply after the matrix multiplication
2552      *
2553      * @return experimental::PostOpList object
2554      */
post_ops()2555     const experimental::PostOpList<ITensorInfo *> &post_ops() const
2556     {
2557         return _post_ops;
2558     }
2559     /** Set post ops
2560      *
2561      * @param[in] post_ops experimental::PostOpList object to set
2562      */
set_post_ops(const experimental::PostOpList<ITensorInfo * > & post_ops)2563     void set_post_ops(const experimental::PostOpList<ITensorInfo *> &post_ops)
2564     {
2565         _post_ops = post_ops;
2566     }
2567     /** Flag which specifies if the GEMM operation is running fixed-format kernels.
2568      *
2569      * @return True if the GEMM operation is running fixed-format kernel else false.
2570      */
fixed_format()2571     bool fixed_format() const
2572     {
2573         return _fixed_format;
2574     }
2575 
2576     /** Set fixed-format flag
2577      *
2578      * @param[in] fixed_format sets whether or not to use fixed-format kernels
2579      */
set_fixed_format(bool fixed_format)2580     void set_fixed_format(bool fixed_format)
2581     {
2582         _fixed_format = fixed_format;
2583     }
2584 
weight_format()2585     arm_compute::WeightFormat weight_format() const
2586     {
2587         return _weight_format;
2588     }
2589 
2590     /** Set weight format to be used
2591      *
2592      * @param[in] weight_format arm_compute::WeightFormat enumeration
2593      */
set_weight_format(arm_compute::WeightFormat weight_format)2594     void set_weight_format(arm_compute::WeightFormat weight_format)
2595     {
2596         _weight_format = weight_format;
2597     }
2598 
2599 private:
2600     bool                                    _is_a_reshaped;
2601     bool                                    _is_b_reshaped;
2602     bool                                    _reshape_b_only_on_first_run;
2603     int                                     _depth_output_gemm3d;
2604     bool                                    _reinterpret_input_as_3d;
2605     bool                                    _retain_internal_weights;
2606     GEMMLowpOutputStageInfo                 _gemmlowp_output_stage;
2607     bool                                    _fast_math;
2608     bool                                    _fp_mixed_precision;
2609     bool                                    _broadcast_bias;
2610     bool                                    _pretranspose_A;
2611     bool                                    _pretranspose_B;
2612     ActivationLayerInfo                     _activation_info;
2613     experimental::PostOpList<ITensorInfo *> _post_ops;
2614     bool                                    _fixed_format;
2615     arm_compute::WeightFormat               _weight_format;
2616 };
2617 
2618 /** Winograd information */
2619 struct WinogradInfo
2620 {
2621     /** Default constructor
2622      *
2623      * @param[in] output_tile_sz Width and height of the output tile
2624      * @param[in] kernel_sz      Width and height of the kernel
2625      * @param[in] input_dims     Width and height of the input tensor before the convolution is applied
2626      * @param[in] conv_info      Convolution info (Pads, strides)
2627      * @param[in] data_layout    Data layout to use for the output tensor once the convolution has been applied
2628      */
WinogradInfoWinogradInfo2629     WinogradInfo(Size2D output_tile_sz, Size2D kernel_sz, Size2D input_dims, PadStrideInfo conv_info, DataLayout data_layout)
2630         : output_tile_size(output_tile_sz), kernel_size(kernel_sz), input_dimensions(input_dims), convolution_info(conv_info), output_data_layout(data_layout)
2631     {
2632     }
2633 
2634     Size2D        output_tile_size{};                     /**< Width and height of the output tile */
2635     Size2D        kernel_size{};                          /**< Width and height of the kernel*/
2636     Size2D        input_dimensions{};                     /**< Width and height of the input tensor before the convolution is applied */
2637     PadStrideInfo convolution_info{};                     /**< Convolution info (Pads, strides,...) */
2638     DataLayout    output_data_layout{ DataLayout::NCHW }; /**< Data layout to use for the output tensor once the convolution has been applied (NCHW or NHWC) */
2639 };
2640 
2641 /** IO formatting information class*/
2642 struct IOFormatInfo
2643 {
2644     /** Precision type used when printing floating point numbers */
2645     enum class PrecisionType
2646     {
2647         Default, /**< Default precision to the one that the current stream has */
2648         Custom,  /**< Custom precision specified by the user using the precision parameter */
2649         Full     /**< The maximum precision of the floating point representation */
2650     };
2651 
2652     /** Specifies the area to be printed, used by Tensor objects */
2653     enum class PrintRegion
2654     {
2655         ValidRegion, /**< Prints the valid region of the Tensor object */
2656         NoPadding,   /**< Prints the Tensor object without the padding */
2657         Full         /**< Print the tensor object including padding */
2658     };
2659 
2660     /** Construct a set of IO formatting information.
2661      *
2662      * @param[in] print_region   Area to be printed. Used by Tensor objects. Default: ValidRegion.
2663      * @param[in] precision_type Precision type for floating point numbers. Default: stream default.
2664      * @param[in] precision      Precision value for float point numbers. Default: 10.
2665      * @param[in] align_columns  Whether to align columns when printed. Default: true.
2666      * @param[in] element_delim  Delimeter between elements. Default: " ".
2667      * @param[in] row_delim      Delimenter between rows. Default: "\n".
2668      */
2669     IOFormatInfo(PrintRegion   print_region   = PrintRegion::ValidRegion,
2670                  PrecisionType precision_type = PrecisionType::Default,
2671                  unsigned int  precision      = 10,
2672                  bool          align_columns  = true,
2673                  std::string   element_delim  = " ",
2674                  std::string   row_delim      = "\n")
print_regionIOFormatInfo2675         : print_region(print_region),
2676           precision_type(precision_type),
2677           precision(precision),
2678           element_delim(element_delim),
2679           row_delim(row_delim),
2680           align_columns(align_columns)
2681     {
2682     }
2683 
2684     /** Area to be printed by Tensor objects */
2685     PrintRegion print_region;
2686     /** Floating point precision type */
2687     PrecisionType precision_type;
2688     /** Floating point precision */
2689     unsigned int precision;
2690     /** Element delimeter */
2691     std::string element_delim;
2692     /** Row delimeter */
2693     std::string row_delim;
2694     /** Align columns */
2695     bool align_columns;
2696 };
2697 } // namespace arm_compute
2698 #endif /* ARM_COMPUTE_TYPES_H */
2699