1 /* 2 * Copyright (c) 2021 Arm Limited. 3 * 4 * SPDX-License-Identifier: MIT 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a copy 7 * of this software and associated documentation files (the "Software"), to 8 * deal in the Software without restriction, including without limitation the 9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or 10 * sell copies of the Software, and to permit persons to whom the Software is 11 * furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included in all 14 * copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 * SOFTWARE. 23 */ 24 #ifndef ARM_COMPUTE_ACL_TYPES_H_ 25 #define ARM_COMPUTE_ACL_TYPES_H_ 26 27 #include <stddef.h> 28 #include <stdint.h> 29 30 #ifdef __cplusplus 31 extern "C" { 32 #endif /* __cplusplus */ 33 34 /**< Opaque Context object */ 35 typedef struct AclContext_ *AclContext; 36 /**< Opaque Queue object */ 37 typedef struct AclQueue_ *AclQueue; 38 /**< Opaque Tensor object */ 39 typedef struct AclTensor_ *AclTensor; 40 /**< Opaque Tensor pack object */ 41 typedef struct AclTensorPack_ *AclTensorPack; 42 /**< Opaque Operator object */ 43 typedef struct AclOperator_ *AclOperator; 44 45 // Capabilities bitfield (Note: if multiple are enabled ComputeLibrary will pick the best possible) 46 typedef uint64_t AclTargetCapabilities; 47 48 /**< Error codes returned by the public entry-points */ 49 typedef enum AclStatus : int32_t 50 { 51 AclSuccess = 0, /**< Call succeeded, leading to valid state for all involved objects/data */ 52 AclRuntimeError = 1, /**< Call failed during execution */ 53 AclOutOfMemory = 2, /**< Call failed due to failure to allocate resources */ 54 AclUnimplemented = 3, /**< Call failed as requested capability is not implemented */ 55 AclUnsupportedTarget = 4, /**< Call failed as an invalid backend was requested */ 56 AclInvalidTarget = 5, /**< Call failed as invalid argument was passed */ 57 AclInvalidArgument = 6, /**< Call failed as invalid argument was passed */ 58 AclUnsupportedConfig = 7, /**< Call failed as configuration is unsupported */ 59 AclInvalidObjectState = 8, /**< Call failed as an object has invalid state */ 60 } AclStatus; 61 62 /**< Supported CPU targets */ 63 typedef enum AclTarget 64 { 65 AclCpu = 0, /**< Cpu target that uses SIMD extensions */ 66 AclGpuOcl = 1, /**< OpenCL target for GPU */ 67 } AclTarget; 68 69 /** Execution mode types */ 70 typedef enum AclExecutionMode 71 { 72 AclPreferFastRerun = 0, /**< Prioritize performance when multiple iterations are performed */ 73 AclPreferFastStart = 1, /**< Prioritize performance when a single iterations is expected to be performed */ 74 } AclExecutionMode; 75 76 /** Available CPU capabilities */ 77 typedef enum AclCpuCapabilities 78 { 79 AclCpuCapabilitiesAuto = 0, /**< Automatic discovery of capabilities */ 80 81 AclCpuCapabilitiesNeon = (1 << 0), /**< Enable NEON optimized paths */ 82 AclCpuCapabilitiesSve = (1 << 1), /**< Enable SVE optimized paths */ 83 AclCpuCapabilitiesSve2 = (1 << 2), /**< Enable SVE2 optimized paths */ 84 // Reserve 3, 4, 5, 6 85 86 AclCpuCapabilitiesFp16 = (1 << 7), /**< Enable float16 data-type support */ 87 AclCpuCapabilitiesBf16 = (1 << 8), /**< Enable bfloat16 data-type support */ 88 // Reserve 9, 10, 11, 12 89 90 AclCpuCapabilitiesDot = (1 << 13), /**< Enable paths that use the udot/sdot instructions */ 91 AclCpuCapabilitiesMmlaInt8 = (1 << 14), /**< Enable paths that use the mmla integer instructions */ 92 AclCpuCapabilitiesMmlaFp = (1 << 15), /**< Enable paths that use the mmla float instructions */ 93 94 AclCpuCapabilitiesAll = ~0 /**< Enable all paths */ 95 } AclCpuCapabilities; 96 97 /**< Allocator interface that can be passed to a context */ 98 typedef struct AclAllocator 99 { 100 /** Allocate a block of size bytes of memory. 101 * 102 * @param[in] user_data User provided data that can be used by the allocator 103 * @param[in] size Size of the allocation 104 * 105 * @return A pointer to the allocated block if successfull else NULL 106 */ 107 void *(*alloc)(void *user_data, size_t size); 108 /** Release a block of size bytes of memory. 109 * 110 * @param[in] user_data User provided data that can be used by the allocator 111 * @param[in] size Size of the allocation 112 */ 113 void (*free)(void *user_data, void *ptr); 114 /** Allocate a block of size bytes of memory. 115 * 116 * @param[in] user_data User provided data that can be used by the allocator 117 * @param[in] size Size of the allocation 118 * 119 * @return A pointer to the allocated block if successfull else NULL 120 */ 121 void *(*aligned_alloc)(void *user_data, size_t size, size_t alignment); 122 /** Allocate a block of size bytes of memory. 123 * 124 * @param[in] user_data User provided data that can be used by the allocator 125 * @param[in] size Size of the allocation 126 */ 127 void (*aligned_free)(void *user_data, void *ptr); 128 129 /**< User provided information */ 130 void *user_data; 131 } AclAllocator; 132 133 /**< Context options */ 134 typedef struct AclContextOptions 135 { 136 AclExecutionMode mode; /**< Execution mode to use */ 137 AclTargetCapabilities capabilities; /**< Target capabilities */ 138 bool enable_fast_math; /**< Allow precision loss */ 139 const char *kernel_config_file; /**< Kernel cofiguration file */ 140 int32_t max_compute_units; /**< Max compute units that can be used by a queue created from the context. 141 If <=0 the system will use the hw concurency insted */ 142 AclAllocator *allocator; /**< Allocator to be used by all the memory internally */ 143 } AclContextOptions; 144 145 /**< Supported tuning modes */ 146 typedef enum 147 { 148 AclTuningModeNone = 0, /**< No tuning */ 149 AclRapid = 1, /**< Fast tuning mode, testing a small portion of the tuning space */ 150 AclNormal = 2, /**< Normal tuning mode, gives a good balance between tuning mode and performance */ 151 AclExhaustive = 3, /**< Exhaustive tuning mode, increased tuning time but with best results */ 152 } AclTuningMode; 153 154 /**< Queue options */ 155 typedef struct 156 { 157 AclTuningMode mode; /**< Tuning mode */ 158 int32_t compute_units; /**< Compute Units that the queue will deploy */ 159 } AclQueueOptions; 160 161 /**< Supported data types */ 162 typedef enum AclDataType 163 { 164 AclDataTypeUnknown = 0, /**< Unknown data type */ 165 AclUInt8 = 1, /**< 8-bit unsigned integer */ 166 AclInt8 = 2, /**< 8-bit signed integer */ 167 AclUInt16 = 3, /**< 16-bit unsigned integer */ 168 AclInt16 = 4, /**< 16-bit signed integer */ 169 AclUint32 = 5, /**< 32-bit unsigned integer */ 170 AclInt32 = 6, /**< 32-bit signed integer */ 171 AclFloat16 = 7, /**< 16-bit floating point */ 172 AclBFloat16 = 8, /**< 16-bit brain floating point */ 173 AclFloat32 = 9, /**< 32-bit floating point */ 174 } AclDataType; 175 176 /**< Supported data layouts for operations */ 177 typedef enum AclDataLayout 178 { 179 AclDataLayoutUnknown = 0, /**< Unknown data layout */ 180 AclNhwc = 1, /**< Native, performant, Compute Library data layout */ 181 AclNchw = 2, /**< Data layout where width is the fastest changing dimension */ 182 } AclDataLayout; 183 184 /** Type of memory to be imported */ 185 typedef enum AclImportMemoryType 186 { 187 AclHostPtr = 0 /**< Host allocated memory */ 188 } AclImportMemoryType; 189 190 /**< Tensor Descriptor */ 191 typedef struct AclTensorDescriptor 192 { 193 int32_t ndims; /**< Number or dimensions */ 194 int32_t *shape; /**< Tensor Shape */ 195 AclDataType data_type; /**< Tensor Data type */ 196 int64_t *strides; /**< Strides on each dimension. Linear memory is assumed if nullptr */ 197 int64_t boffset; /**< Offset in terms of bytes for the first element */ 198 } AclTensorDescriptor; 199 200 /**< Slot type of a tensor */ 201 typedef enum 202 { 203 AclSlotUnknown = -1, 204 AclSrc = 0, 205 AclSrc0 = 0, 206 AclSrc1 = 1, 207 AclDst = 30, 208 AclSrcVec = 256, 209 } AclTensorSlot; 210 211 #ifdef __cplusplus 212 } 213 #endif /* __cplusplus */ 214 #endif /* ARM_COMPUTE_ACL_TYPES_H_ */ 215