• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2021 Arm Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
24 #ifndef ARM_COMPUTE_ACL_TYPES_H_
25 #define ARM_COMPUTE_ACL_TYPES_H_
26 
27 #include <stddef.h>
28 #include <stdint.h>
29 
30 #ifdef __cplusplus
31 extern "C" {
32 #endif /* __cplusplus */
33 
34 /**< Opaque Context object */
35 typedef struct AclContext_ *AclContext;
36 /**< Opaque Queue object */
37 typedef struct AclQueue_ *AclQueue;
38 /**< Opaque Tensor object */
39 typedef struct AclTensor_ *AclTensor;
40 /**< Opaque Tensor pack object */
41 typedef struct AclTensorPack_ *AclTensorPack;
42 /**< Opaque Operator object */
43 typedef struct AclOperator_ *AclOperator;
44 
45 // Capabilities bitfield (Note: if multiple are enabled ComputeLibrary will pick the best possible)
46 typedef uint64_t AclTargetCapabilities;
47 
48 /**< Error codes returned by the public entry-points */
49 typedef enum AclStatus : int32_t
50 {
51     AclSuccess            = 0, /**< Call succeeded, leading to valid state for all involved objects/data */
52     AclRuntimeError       = 1, /**< Call failed during execution */
53     AclOutOfMemory        = 2, /**< Call failed due to failure to allocate resources */
54     AclUnimplemented      = 3, /**< Call failed as requested capability is not implemented */
55     AclUnsupportedTarget  = 4, /**< Call failed as an invalid backend was requested */
56     AclInvalidTarget      = 5, /**< Call failed as invalid argument was passed */
57     AclInvalidArgument    = 6, /**< Call failed as invalid argument was passed */
58     AclUnsupportedConfig  = 7, /**< Call failed as configuration is unsupported */
59     AclInvalidObjectState = 8, /**< Call failed as an object has invalid state */
60 } AclStatus;
61 
62 /**< Supported CPU targets */
63 typedef enum AclTarget
64 {
65     AclCpu    = 0, /**< Cpu target that uses SIMD extensions */
66     AclGpuOcl = 1, /**< OpenCL target for GPU */
67 } AclTarget;
68 
69 /** Execution mode types */
70 typedef enum AclExecutionMode
71 {
72     AclPreferFastRerun = 0, /**< Prioritize performance when multiple iterations are performed */
73     AclPreferFastStart = 1, /**< Prioritize performance when a single iterations is expected to be performed */
74 } AclExecutionMode;
75 
76 /** Available CPU capabilities */
77 typedef enum AclCpuCapabilities
78 {
79     AclCpuCapabilitiesAuto = 0, /**< Automatic discovery of capabilities */
80 
81     AclCpuCapabilitiesNeon = (1 << 0), /**< Enable NEON optimized paths */
82     AclCpuCapabilitiesSve  = (1 << 1), /**< Enable SVE optimized paths */
83     AclCpuCapabilitiesSve2 = (1 << 2), /**< Enable SVE2 optimized paths */
84     // Reserve 3, 4, 5, 6
85 
86     AclCpuCapabilitiesFp16 = (1 << 7), /**< Enable float16 data-type support */
87     AclCpuCapabilitiesBf16 = (1 << 8), /**< Enable bfloat16 data-type support */
88     // Reserve 9, 10, 11, 12
89 
90     AclCpuCapabilitiesDot      = (1 << 13), /**< Enable paths that use the udot/sdot instructions */
91     AclCpuCapabilitiesMmlaInt8 = (1 << 14), /**< Enable paths that use the mmla integer instructions */
92     AclCpuCapabilitiesMmlaFp   = (1 << 15), /**< Enable paths that use the mmla float instructions */
93 
94     AclCpuCapabilitiesAll = ~0 /**< Enable all paths */
95 } AclCpuCapabilities;
96 
97 /**< Allocator interface that can be passed to a context */
98 typedef struct AclAllocator
99 {
100     /** Allocate a block of size bytes of memory.
101      *
102      * @param[in] user_data User provided data that can be used by the allocator
103      * @param[in] size      Size of the allocation
104      *
105      * @return A pointer to the allocated block if successfull else NULL
106      */
107     void *(*alloc)(void *user_data, size_t size);
108     /** Release a block of size bytes of memory.
109      *
110      * @param[in] user_data User provided data that can be used by the allocator
111      * @param[in] size      Size of the allocation
112      */
113     void (*free)(void *user_data, void *ptr);
114     /** Allocate a block of size bytes of memory.
115      *
116      * @param[in] user_data User provided data that can be used by the allocator
117      * @param[in] size      Size of the allocation
118      *
119      * @return A pointer to the allocated block if successfull else NULL
120      */
121     void *(*aligned_alloc)(void *user_data, size_t size, size_t alignment);
122     /** Allocate a block of size bytes of memory.
123      *
124      * @param[in] user_data User provided data that can be used by the allocator
125      * @param[in] size      Size of the allocation
126      */
127     void (*aligned_free)(void *user_data, void *ptr);
128 
129     /**< User provided information */
130     void *user_data;
131 } AclAllocator;
132 
133 /**< Context options */
134 typedef struct AclContextOptions
135 {
136     AclExecutionMode      mode;               /**< Execution mode to use */
137     AclTargetCapabilities capabilities;       /**< Target capabilities */
138     bool                  enable_fast_math;   /**< Allow precision loss */
139     const char           *kernel_config_file; /**< Kernel cofiguration file */
140     int32_t               max_compute_units;  /**< Max compute units that can be used by a queue created from the context.
141                                                    If <=0 the system will use the hw concurency insted */
142     AclAllocator         *allocator;          /**< Allocator to be used by all the memory internally */
143 } AclContextOptions;
144 
145 /**< Supported tuning modes */
146 typedef enum
147 {
148     AclTuningModeNone = 0, /**< No tuning */
149     AclRapid          = 1, /**< Fast tuning mode, testing a small portion of the tuning space */
150     AclNormal         = 2, /**< Normal tuning mode, gives a good balance between tuning mode and performance */
151     AclExhaustive     = 3, /**< Exhaustive tuning mode, increased tuning time but with best results */
152 } AclTuningMode;
153 
154 /**< Queue options */
155 typedef struct
156 {
157     AclTuningMode mode;          /**< Tuning mode */
158     int32_t       compute_units; /**< Compute Units that the queue will deploy */
159 } AclQueueOptions;
160 
161 /**< Supported data types */
162 typedef enum AclDataType
163 {
164     AclDataTypeUnknown = 0, /**< Unknown data type */
165     AclUInt8           = 1, /**< 8-bit unsigned integer */
166     AclInt8            = 2, /**< 8-bit signed integer */
167     AclUInt16          = 3, /**< 16-bit unsigned integer */
168     AclInt16           = 4, /**< 16-bit signed integer */
169     AclUint32          = 5, /**< 32-bit unsigned integer */
170     AclInt32           = 6, /**< 32-bit signed integer */
171     AclFloat16         = 7, /**< 16-bit floating point */
172     AclBFloat16        = 8, /**< 16-bit brain floating point */
173     AclFloat32         = 9, /**< 32-bit floating point */
174 } AclDataType;
175 
176 /**< Supported data layouts for operations */
177 typedef enum AclDataLayout
178 {
179     AclDataLayoutUnknown = 0, /**< Unknown data layout */
180     AclNhwc              = 1, /**< Native, performant, Compute Library data layout */
181     AclNchw              = 2, /**< Data layout where width is the fastest changing dimension */
182 } AclDataLayout;
183 
184 /** Type of memory to be imported */
185 typedef enum AclImportMemoryType
186 {
187     AclHostPtr = 0 /**< Host allocated memory */
188 } AclImportMemoryType;
189 
190 /**< Tensor Descriptor */
191 typedef struct AclTensorDescriptor
192 {
193     int32_t     ndims;     /**< Number or dimensions */
194     int32_t    *shape;     /**< Tensor Shape */
195     AclDataType data_type; /**< Tensor Data type */
196     int64_t    *strides;   /**< Strides on each dimension. Linear memory is assumed if nullptr */
197     int64_t     boffset;   /**< Offset in terms of bytes for the first element */
198 } AclTensorDescriptor;
199 
200 /**< Slot type of a tensor */
201 typedef enum
202 {
203     AclSlotUnknown = -1,
204     AclSrc         = 0,
205     AclSrc0        = 0,
206     AclSrc1        = 1,
207     AclDst         = 30,
208     AclSrcVec      = 256,
209 } AclTensorSlot;
210 
211 #ifdef __cplusplus
212 }
213 #endif /* __cplusplus */
214 #endif /* ARM_COMPUTE_ACL_TYPES_H_ */
215