1 // Copyright (c) Facebook, Inc. and its affiliates. 2 // All rights reserved. 3 // 4 // Copyright 2019 Google LLC 5 // 6 // This source code is licensed under the BSD-style license found in the 7 // LICENSE file in the root directory of this source tree. 8 9 #pragma once 10 11 #include <stdbool.h> 12 #include <stddef.h> 13 #include <stdint.h> 14 15 #include <pthreadpool.h> 16 17 #ifdef __cplusplus 18 extern "C" { 19 #endif 20 21 /// The number of bytes XNNPACK may read beyond array bounds. 22 /// The caller must allocate at least this many extra bytes after the tensor data passed to XNNPACK. 23 /// 24 /// Note: XNNPACK reads, but never writes beyond array bounds. 25 #define XNN_EXTRA_BYTES 16 26 27 /// Maximum number of dimensions in tensor shape. 28 #define XNN_MAX_TENSOR_DIMS 6 29 30 /// Allow sparse inference in a Runtime. 31 /// 32 /// Note: this flag hints XNNPACK to consider sparse inference, but does not guarantee it. 33 #define XNN_FLAG_SPARSE_INFERENCE 0x00000001 34 #define XNN_FLAG_HINT_SPARSE_INFERENCE XNN_FLAG_SPARSE_INFERENCE 35 36 /// Allow IEEE FP16 inference in a Runtime. 37 /// 38 /// Note: this flag hints XNNPACK to consider IEEE FP16 inference, but does not guarantee it. 39 #define XNN_FLAG_FP16_INFERENCE 0x00000002 40 #define XNN_FLAG_HINT_FP16_INFERENCE XNN_FLAG_FP16_INFERENCE 41 42 /// Force IEEE FP16 inference in a Runtime, and fail if FP16 inference is not possible. 43 /// 44 /// Note: this flag guarantees that XNNPACK will use IEEE FP16 inference, or fail to create the Runtime object. 45 /// Warning: on x86 systems FP16 computations will be emulated at a substantial performance cost. 46 #define XNN_FLAG_FORCE_FP16_INFERENCE 0x00000004 47 48 /// Enable timing of each operator's runtime. 49 #define XNN_FLAG_BASIC_PROFILING 0x00000008 50 51 /// The convolution operator represents a depthwise convolution, and use HWGo layout for filters. 52 #define XNN_FLAG_DEPTHWISE_CONVOLUTION 0x00000001 53 54 /// Assume transposed weights in a fully connected operator. 55 #define XNN_FLAG_TRANSPOSE_WEIGHTS 0x00000001 56 57 /// The operator assumes NHWC layout for the input, regardless of the output layout. 58 #define XNN_FLAG_INPUT_NHWC 0x00000002 59 60 /// Match "SAME" padding in TensorFlow. Exact padding values are computed dynamically depending on input size. 61 #define XNN_FLAG_TENSORFLOW_SAME_PADDING 0x00000004 62 63 /// Implicitly flatten and reshape input of a Fully Connected operator into a 2D tensor. 64 #define XNN_FLAG_TENSORFLOW_RESHAPE_2D 0x00000004 65 66 /// Match behaviour of TensorFlow 1.x. 67 #define XNN_FLAG_TENSORFLOW_LEGACY_MODE 0x00000004 68 69 /// Static weights of the FP16 operator are in FP32 format. 70 #define XNN_FLAG_FP32_STATIC_WEIGHTS 0x00000008 71 72 /// Align corners of input and output images in resize operations. 73 #define XNN_FLAG_ALIGN_CORNERS 0x00000008 74 75 /// Yield worker threads of the thread pool to the system scheduler after the inference. 76 #define XNN_FLAG_YIELD_WORKERS 0x00000010 77 78 /// Status code for any XNNPACK function call. 79 enum xnn_status { 80 /// The call succeeded, and all output arguments now contain valid data. 81 xnn_status_success = 0, 82 xnn_status_uninitialized = 1, 83 xnn_status_invalid_parameter = 2, 84 xnn_status_invalid_state = 3, 85 xnn_status_unsupported_parameter = 4, 86 xnn_status_unsupported_hardware = 5, 87 xnn_status_out_of_memory = 6, 88 }; 89 90 struct xnn_allocator { 91 /// User-specified pointer that will be passed as-is to all functions in this structure. 92 void* context; 93 /// Pointer to a function to be called for general memory allocation. 94 /// 95 /// @param context - The user-specified pointer from xnn_allocator structure. 96 /// @param size - The size of the memory block to allocate, in bytes. 97 /// 98 /// @returns Pointer to the allocated memory block of at least @ref size bytes. 99 /// If allocation fails, the function must return NULL. 100 void* (*allocate)(void* context, size_t size); 101 /// Pointer to a function to be called for general memory re-allocation, i.e. to increase or shrink a previously 102 /// allocated memory block. The content of the old memory block is copied to the new memory block. 103 /// 104 /// @param context - The user-specified pointer from xnn_allocator structure. 105 /// @param pointer - Pointer to a memory block allocated by @ref allocate or @ref reallocate functions. Can be NULL. 106 /// If the pointer is NULL, the @ref reallocate call is equivalent to an @ref allocate call. 107 /// @param size - The new size of the memory block to allocate, in bytes. 108 /// 109 /// @returns Pointer to the newly allocated memory block of at least @ref size bytes with the content of the previous 110 /// memory block. 111 /// If allocation fails, the function must return NULL, but must not release the previous memory block. 112 void* (*reallocate)(void* context, void* pointer, size_t size); 113 /// Pointer to a function to be called for general memory de-allocation. 114 /// 115 /// @param context - The user-specified pointer from xnn_allocator structure. 116 /// @param pointer - Pointer to a memory block allocated by @ref allocate or @ref reallocate functions. Can be NULL. 117 /// If the pointer is NULL, the @ref deallocate call is a no-op. 118 void (*deallocate)(void* context, void* pointer); 119 /// Pointer to a function to be called for aligned memory allocation. 120 /// 121 /// @param context - The user-specified pointer from xnn_allocator structure. 122 /// @param alignment - The alignment of the memory block to allocate, in bytes. Alignment is always a power-of-2. 123 /// @param size - The size of the memory block to allocate, in bytes. 124 /// 125 /// @returns Pointer to the allocated memory block of at least @ref size bytes. 126 /// If allocation fails, the function must return NULL. 127 void* (*aligned_allocate)(void* context, size_t alignment, size_t size); 128 /// Pointer to a function to be called for aligned memory de-allocation. 129 /// 130 /// @param context - The user-specified pointer from xnn_allocator structure. 131 /// @param pointer - Pointer to a memory block allocated by @ref aligned_allocate function. Can be NULL. 132 /// If the pointer is NULL, the @ref aligned_deallocate call is a no-op. 133 void (*aligned_deallocate)(void* context, void* pointer); 134 }; 135 136 /// Initialize XNNPACK library. 137 /// 138 /// XNNPACK must be successfully initialized before use. During initialization, XNNPACK populates internal structures 139 /// depending on the host processor. Initialization can be time-consuming. 140 /// 141 /// @param[in] allocator - structure with function pointers to be use for memory allocation and de-allocation. 142 /// If this argument is NULL, system-provided memory management functions (e.g. malloc/free) 143 /// will be used. 144 /// 145 /// @retval xnn_status_success - XNNPACK is successfully initialized and ready to use. 146 /// @retval xnn_status_out_of_memory - initialization failed due to out-of-memory condition. 147 /// @retval xnn_status_unsupported_hardware - initialization failed because the host processor does not satisfy the 148 /// minimum hardware requirements for XNNPACK. E.g. this may happen on x86 149 /// processors without SSE2 extension, or on 32-bit ARM processors without 150 /// the NEON SIMD extension. 151 enum xnn_status xnn_initialize(const struct xnn_allocator* allocator); 152 153 /// Deinitialize XNNPACK library. 154 /// 155 /// To avoid memory and resource leaks, users must call xnn_deinitialize once for each successful xnn_initialize call. 156 /// 157 /// @retval xnn_status_success - deinitialization call succeeded. 158 enum xnn_status xnn_deinitialize(void); 159 160 /// Subgraph is an abstract representation of a neural network model. 161 /// Subgraph objects are used to define Values (tensors) and Nodes (operators) comprising the model. 162 typedef struct xnn_subgraph* xnn_subgraph_t; 163 164 /// Create a empty Subgraph object. 165 /// 166 /// @param external_value_ids - number of Value IDs to reserve for communication with external graph representation. 167 /// The Subgraph object would avoid creating internal Value IDs in the 168 /// [0, reserved_value_ids-1] range. 169 /// @param flags - binary features of the subgraph. No supported flags are currently defined. 170 /// @param subgraph_out - pointer to the variable that will be initialized with a handle to the Subgraph object upon 171 /// successful return. 172 enum xnn_status xnn_create_subgraph( 173 uint32_t external_value_ids, 174 uint32_t flags, 175 xnn_subgraph_t* subgraph_out); 176 177 /// Destroy a Subgraph object, as well as Values, and Nodes associated with the subgraph. 178 /// 179 /// @param subgraph - the Subgraph object to destroy. 180 enum xnn_status xnn_delete_subgraph( 181 xnn_subgraph_t subgraph); 182 183 #define XNN_VALUE_FLAG_EXTERNAL_INPUT 0x00000001 184 #define XNN_VALUE_FLAG_EXTERNAL_OUTPUT 0x00000002 185 186 #define XNN_INVALID_VALUE_ID UINT32_MAX 187 188 /// Type of elements in a Value object. 189 enum xnn_datatype { 190 /// Invalid data type. Valid Values never have this datatype. 191 xnn_datatype_invalid = 0, 192 /// IEEE754 single-precision floating-point. 193 xnn_datatype_fp32 = 1, 194 /// IEEE754 half-precision floating-point. 195 xnn_datatype_fp16 = 2, 196 /// Quantized 8-bit signed integer with shared per-Value quantization parameters. 197 xnn_datatype_qint8 = 3, 198 /// Quantized 8-bit unsigned integer with shared per-Value quantization parameters. 199 xnn_datatype_quint8 = 4, 200 /// Quantized 32-bit signed integer with shared per-Value quantization parameters. 201 xnn_datatype_qint32 = 5, 202 /// Quantized 8-bit signed integer with shared per-channel quantization parameters. 203 xnn_datatype_qcint8 = 6, 204 /// Quantized 32-bit signed integer with shared per-channel quantization parameters. 205 xnn_datatype_qcint32 = 7, 206 }; 207 208 /// Define a tensor-type Value and add it to a Subgraph. 209 /// 210 /// @param subgraph - a Subgraph object that will own the created Value. 211 /// @param datatype - type of the tensor elements. 212 /// @param num_dims - number of dimensions in the shape. 213 /// @param dims - pointer to an array of @a num_dims shape dimensions. If num_dims is 0, this pointer can be NULL. 214 /// XNNPACK does not keep any pointers to this array after the function returns. 215 /// @param data - pointer to static data used for tensor initialization. If the tensor is not statically initialized, 216 /// this pointer must be is NULL. If non-NULL, the life-time of the static data must exceed the life-time 217 /// of the Subgraph object, and of any Runtime objects created from the Subgraph. 218 /// @param external_id - external ID for the Value. The ID must be within the range of reversed Value IDs specified on 219 /// the Subgraph creation. If the external ID is XNN_INVALID_VALUE_ID, an internal ID will be 220 /// created for the Value. 221 /// @param flags - binary features of the Value. Supported values are any combination of XNN_VALUE_FLAG_EXTERNAL_INPUT 222 /// and XNN_VALUE_FLAG_EXTERNAL_OUTPUT. 223 /// @param id_out - pointer to the variable that will be initialized with the Value ID upon successful return. If a 224 /// valid @a external_id was provided, the variable will be initialized with the @a external_id value. 225 enum xnn_status xnn_define_tensor_value( 226 xnn_subgraph_t subgraph, 227 enum xnn_datatype datatype, 228 size_t num_dims, 229 const size_t* dims, 230 const void* data, 231 uint32_t external_id, 232 uint32_t flags, 233 uint32_t* id_out); 234 235 /// Define a quantized tensor-type Value and add it to a Subgraph. 236 /// 237 /// @param subgraph - a Subgraph object that will own the created Value. 238 /// @param datatype - type of the tensor elements. 239 /// @param zero_point - offset from zero to subtract from the quantized elements in the Value. 240 /// @param scale - multiplication factor to convert quantized elements to real representation. 241 /// @param num_dims - number of dimensions in the shape. 242 /// @param dims - pointer to an array of @a num_dims shape dimensions. If num_dims is 0, this pointer can be NULL. 243 /// XNNPACK does not keep any pointers to this array after the function returns. 244 /// @param data - pointer to static data used for tensor initialization. If the tensor is not statically initialized, 245 /// this pointer must be is NULL. If non-NULL, the life-time of the static data must exceed the life-time 246 /// of the Subgraph object, and of any Runtime objects created from the Subgraph. 247 /// @param external_id - external ID for the Value. The ID must be within the range of reversed Value IDs specified on 248 /// the Subgraph creation. If the external ID is XNN_INVALID_VALUE_ID, an internal ID will be 249 /// created for the Value. 250 /// @param flags - binary features of the Value. Supported values are any combination of XNN_VALUE_FLAG_EXTERNAL_INPUT 251 /// and XNN_VALUE_FLAG_EXTERNAL_OUTPUT. 252 /// @param id_out - pointer to the variable that will be initialized with the Value ID upon successful return. If a 253 /// valid @a external_id was provided, the variable will be initialized with the @a external_id value. 254 enum xnn_status xnn_define_quantized_tensor_value( 255 xnn_subgraph_t subgraph, 256 enum xnn_datatype datatype, 257 int32_t zero_point, 258 float scale, 259 size_t num_dims, 260 const size_t* dims, 261 const void* data, 262 uint32_t external_id, 263 uint32_t flags, 264 uint32_t* id_out); 265 266 /// Define a channelwise quantized tensor-type Value and add it to a Subgraph. 267 /// 268 /// @param subgraph - a Subgraph object that will own the created Value. 269 /// @param datatype - type of the tensor elements. 270 /// @param scale - per-channel multiplication factors to convert quantized elements to real representation. 271 /// @param num_dims - number of dimensions in the shape. 272 /// @param channel_dim - index of the channel dimension in the tensor with per-channel quantization parameters. 273 /// Typically this is the first dimension (dimension #0) of the filter tensors in the Convolution, 274 /// Deconvolution, and Fully Connected operators and the last dimension of the filter tensors in 275 /// the Depthwise Convolution operators. 276 /// @param dims - pointer to an array of @a num_dims shape dimensions. If num_dims is 0, this pointer can be NULL. 277 /// XNNPACK does not keep any pointers to this array after the function returns. 278 /// @param data - pointer to static data used for tensor initialization. If the tensor is not statically initialized, 279 /// this pointer must be is NULL. If non-NULL, the life-time of the static data must exceed the life-time 280 /// of the Subgraph object, and of any Runtime objects created from the Subgraph. 281 /// @param external_id - external ID for the Value. The ID must be within the range of reversed Value IDs specified on 282 /// the Subgraph creation. If the external ID is XNN_INVALID_VALUE_ID, an internal ID will be 283 /// created for the Value. 284 /// @param flags - binary features of the Value. Supported values are any combination of XNN_VALUE_FLAG_EXTERNAL_INPUT 285 /// and XNN_VALUE_FLAG_EXTERNAL_OUTPUT. 286 /// @param id_out - pointer to the variable that will be initialized with the Value ID upon successful return. If a 287 /// valid @a external_id was provided, the variable will be initialized with the @a external_id value. 288 enum xnn_status xnn_define_channelwise_quantized_tensor_value( 289 xnn_subgraph_t subgraph, 290 enum xnn_datatype datatype, 291 const float* scale, 292 size_t num_dims, 293 size_t channel_dim, 294 const size_t* dims, 295 const void* data, 296 uint32_t external_id, 297 uint32_t flags, 298 uint32_t* id_out); 299 300 /// Define a Convert Node and add it to a Subgraph. 301 /// 302 /// @param subgraph - a Subgraph object that will own the created Node. 303 /// @param input_id - Value ID for the input tensor. The input tensor must be defined in the @a subgraph. 304 /// @param output_id - Value ID for the output tensor. The output tensor must be defined in the @a subgraph, and its 305 /// shape must match the shape of the input tensor. 306 /// @param flags - binary features of the Convert Node. No supported flags are currently defined. 307 enum xnn_status xnn_define_convert( 308 xnn_subgraph_t subgraph, 309 uint32_t input_id, 310 uint32_t output_id, 311 uint32_t flags); 312 313 /// Define a 2D Convolution Node and add it to a Subgraph. 314 /// 315 /// @param subgraph - a Subgraph object that will own the created Node. 316 /// @param input_padding_top - implicit zero-padding above 2D input data. Must be 0 if XNN_FLAG_TENSORFLOW_SAME_PADDING 317 /// flag is specified. 318 /// @param input_padding_right - implicit zero-padding to the right of 2D input data. Must be 0 if 319 /// XNN_FLAG_TENSORFLOW_SAME_PADDING flag is specified. 320 /// @param input_padding_bottom - implicit zero-padding below 2D input data. Must be 0 if 321 /// XNN_FLAG_TENSORFLOW_SAME_PADDING flag is specified. 322 /// @param input_padding_left - implicit zero-padding to the left of 2D input data. Must be 0 if 323 /// XNN_FLAG_TENSORFLOW_SAME_PADDING flag is specified. 324 /// @param kernel_height - kernel (filter) height. 325 /// @param kernel_width - kernel (filter) width. 326 /// @param subsampling_height - height of subsampling region for convolution output (convolution height stride). 327 /// @param subsampling_width - width of subsampling region for convolution output (convolution width stride). 328 /// @param dilation_height - dilation of kernel elements along the height dimension. 329 /// @param dilation_width - dilation of kernel elements along the width dimension. 330 /// @param groups - number of convolution groups. 331 /// @param group_input_channels - number of input channels per group. 332 /// @param group_output_channels - number of output channels per group. 333 /// @param output_min - lower bound for clipping output values. 334 /// @param output_max - upper bound for clipping output values. 335 /// @param input_id - Value ID for the input tensor. The input tensor must be a 4D tensor defined in the @a subgraph 336 /// with [N, IH, IW, groups * group_input_channels] dimensions 337 /// @param filter_id - Value ID for the filter tensor. The filter tensor must ge a 4D tensor defined in the @a subgraph 338 /// with [groups * group_output_channels, kernel_height, kernel_width, group_input_channels] 339 /// dimensions. 340 /// @param bias_id - Value ID for the bias tensor, or XNN_INVALID_VALUE_ID for a 2D Convolution Node without a bias. If 341 /// present, the bias tensor must be a 1D tensor defined in the @a subgraph with [groups * 342 /// group_output_channels] dimensions. 343 /// @param output_id - Value ID for the output tensor. The output tensor must be a 4D tensor defined in the @a subgraph 344 /// with [N, OH, OW, groups * group_output_channels] dimensions. 345 /// @param flags - binary features of the 2D Convolution Node. The only currently supported values is 346 /// XNN_FLAG_TENSORFLOW_SAME_PADDING. 347 enum xnn_status xnn_define_convolution_2d( 348 xnn_subgraph_t subgraph, 349 uint32_t input_padding_top, 350 uint32_t input_padding_right, 351 uint32_t input_padding_bottom, 352 uint32_t input_padding_left, 353 uint32_t kernel_height, 354 uint32_t kernel_width, 355 uint32_t subsampling_height, 356 uint32_t subsampling_width, 357 uint32_t dilation_height, 358 uint32_t dilation_width, 359 uint32_t groups, 360 size_t group_input_channels, 361 size_t group_output_channels, 362 float output_min, 363 float output_max, 364 uint32_t input_id, 365 uint32_t filter_id, 366 uint32_t bias_id, 367 uint32_t output_id, 368 uint32_t flags); 369 370 /// Define a 2D Deconvolution (Transposed Convolution) Node and add it to a Subgraph. 371 /// 372 /// @param subgraph - a Subgraph object that will own the created Node. 373 /// @param padding_top - implicit padding above 2D output data. 374 /// @param padding_right - implicit padding to the right of 2D output data. 375 /// @param padding_bottom - implicit padding below 2D output data. 376 /// @param padding_left - implicit padding to the left of 2D output data. 377 /// @param adjustment_height - additional elements in the bottom of the 2D output data. 378 /// @param adjustment_width - additional elements to the right of the 2D output data. 379 /// @param kernel_height - kernel (filter) height. 380 /// @param kernel_width - kernel (filter) width. 381 /// @param upsampling_height - height of upsampling region for deconvolution input (deconvolution height stride). 382 /// @param upsampling_width - width of upsampling region for deconvolution input (deconvolution width stride). 383 /// @param dilation_height - dilation of kernel elements along the height dimension. 384 /// @param dilation_width - dilation of kernel elements along the width dimension. 385 /// @param groups - number of convolution groups. 386 /// @param group_input_channels - number of input channels per group. 387 /// @param group_output_channels - number of output channels per group. 388 /// @param output_min - lower bound for clipping output values. 389 /// @param output_max - upper bound for clipping output values. 390 /// @param input_id - Value ID for the input tensor. The input tensor must be a 4D tensor defined in the @a subgraph 391 /// with [N, IH, IW, groups * group_input_channels] dimensions 392 /// @param filter_id - Value ID for the filter tensor. The filter tensor must ge a 4D tensor defined in the @a subgraph 393 /// with [groups * group_output_channels, kernel_height, kernel_width, group_input_channels] 394 /// dimensions. 395 /// @param bias_id - Value ID for the bias tensor, or XNN_INVALID_VALUE_ID for a 2D Convolution Node without a bias. If 396 /// present, the bias tensor must be a 1D tensor defined in the @a subgraph with 397 /// [groups * group_output_channels] dimensions. 398 /// @param output_id - Value ID for the output tensor. The output tensor must be a 4D tensor defined in the @a subgraph 399 /// with [N, OH, OW, groups * group_output_channels] dimensions. 400 /// @param flags - binary features of the 2D Deconvolution Node. No supported flags are currently defined. 401 enum xnn_status xnn_define_deconvolution_2d( 402 xnn_subgraph_t subgraph, 403 uint32_t padding_top, 404 uint32_t padding_right, 405 uint32_t padding_bottom, 406 uint32_t padding_left, 407 uint32_t adjustment_height, 408 uint32_t adjustment_width, 409 uint32_t kernel_height, 410 uint32_t kernel_width, 411 uint32_t upsampling_height, 412 uint32_t upsampling_width, 413 uint32_t dilation_height, 414 uint32_t dilation_width, 415 uint32_t groups, 416 size_t group_input_channels, 417 size_t group_output_channels, 418 float output_min, 419 float output_max, 420 uint32_t input_id, 421 uint32_t filter_id, 422 uint32_t bias_id, 423 uint32_t output_id, 424 uint32_t flags); 425 426 /// Define a 2D Depthwise Convolution Node and add it to a Subgraph. 427 /// 428 /// @param subgraph - a Subgraph object that will own the created Node. 429 /// @param input_padding_top - implicit zero-padding above 2D input data. Must be 0 if XNN_FLAG_TENSORFLOW_SAME_PADDING 430 /// flag is specified. 431 /// @param input_padding_right - implicit zero-padding to the right of 2D input data. Must be 0 if 432 /// XNN_FLAG_TENSORFLOW_SAME_PADDING flag is specified. 433 /// @param input_padding_bottom - implicit zero-padding below 2D input data. Must be 0 if 434 /// XNN_FLAG_TENSORFLOW_SAME_PADDING flag is specified. 435 /// @param input_padding_left - implicit zero-padding to the left of 2D input data. Must be 0 if 436 /// XNN_FLAG_TENSORFLOW_SAME_PADDING flag is specified. 437 /// @param kernel_height - kernel (filter) height. 438 /// @param kernel_width - kernel (filter) width. 439 /// @param subsampling_height - height of subsampling region for convolution output (convolution height stride). 440 /// @param subsampling_width - width of subsampling region for convolution output (convolution width stride). 441 /// @param dilation_height - dilation of kernel elements along the height dimension. 442 /// @param dilation_width - dilation of kernel elements along the width dimension. 443 /// @param depth_multiplier - ratio of output channels to input channels. 444 /// @param input_channels - number of input channels. 445 /// @param output_min - lower bound for clipping output values. 446 /// @param output_max - upper bound for clipping output values. 447 /// @param input_id - Value ID for the input tensor. The input tensor must be a 4D tensor defined in the @a subgraph 448 /// with [N, IH, IW, input_channels] dimensions 449 /// @param filter_id - Value ID for the filter tensor. The filter tensor must ge a 4D tensor defined in the @a subgraph 450 /// with [1, kernel_height, kernel_width, input_channels * depth_multiplier] dimensions. 451 /// @param bias_id - Value ID for the bias tensor, or XNN_INVALID_VALUE_ID for a 2D Depthwise Convolution Node without 452 /// a bias. If present, the bias tensor must be a 1D tensor defined in the @a subgraph with 453 /// [input_channels * depth_multiplier] dimensions. 454 /// @param output_id - Value ID for the output tensor. The output tensor must be a 4D tensor defined in the @a subgraph 455 /// with [N, OH, OW, input_channels * depth_multiplier] dimensions. 456 /// @param flags - binary features of the 2D Depthwise Convolution Node. The only currently supported values is 457 /// XNN_FLAG_TENSORFLOW_SAME_PADDING. 458 enum xnn_status xnn_define_depthwise_convolution_2d( 459 xnn_subgraph_t subgraph, 460 uint32_t input_padding_top, 461 uint32_t input_padding_right, 462 uint32_t input_padding_bottom, 463 uint32_t input_padding_left, 464 uint32_t kernel_height, 465 uint32_t kernel_width, 466 uint32_t subsampling_height, 467 uint32_t subsampling_width, 468 uint32_t dilation_height, 469 uint32_t dilation_width, 470 uint32_t depth_multiplier, 471 size_t input_channels, 472 float output_min, 473 float output_max, 474 uint32_t input_id, 475 uint32_t filter_id, 476 uint32_t bias_id, 477 uint32_t output_id, 478 uint32_t flags); 479 480 /// Define a Depth To Space Node and add it to a Subgraph. 481 /// 482 /// The Depth To Space Node rearranges data from depth into blocks of spatial data (a reverse transform to 483 /// Space To Depth). For a given input pixel, an output square of pixels with side @a block_size is formed from values 484 /// in the corresponding number of its channels. The output depth is therefore @a block_size x @a block_size times 485 /// smaller than that of the input. 486 /// 487 /// @param subgraph - a Subgraph object that will own the created Node. 488 /// @param input_id - Value ID for the input tensor. The input tensor must be a 4D tensor defined in the @a subgraph 489 /// with [N, IH, IW, OC * block_size * block_size] dimensions. 490 /// @param output_id - Value ID for the output tensor. The output tensor must be a 4D tensor defined in the @a subgraph 491 /// with [N, IH * block_size, IW * block_size, OC] dimensions. 492 /// @param block_size - the size of the spatial block. 493 /// @param flags - binary features of the input_channels Node. No supported flags are currently defined. 494 enum xnn_status xnn_define_depth_to_space( 495 xnn_subgraph_t subgraph, 496 uint32_t input_id, 497 uint32_t output_id, 498 uint32_t block_size, 499 uint32_t flags); 500 501 /// Define a 1D Global Average Pooling Node and add it to a Subgraph. 502 /// 503 /// @param subgraph - a Subgraph object that will own the created Node. 504 /// @param output_min - lower bound for clipping output values. 505 /// @param output_max - upper bound for clipping output values. 506 /// @param input_id - Value ID for the input tensor. The input tensor must be a dense tensor with 2 or more dimensions 507 /// defined in the @a subgraph. Averaging is performed across the second-innermost dimension. 508 /// @param output_id - Value ID for the output tensor. The output tensor must be a dense tensor with 2 or more 509 /// dimensions defined in the @a subgraph. 510 /// @param flags - binary features of the 1D Global Average Pooling Node. No supported flags are currently defined. 511 enum xnn_status xnn_define_global_average_pooling_1d( 512 xnn_subgraph_t subgraph, 513 float output_min, 514 float output_max, 515 uint32_t input_id, 516 uint32_t output_id, 517 uint32_t flags); 518 519 /// Define a 2D Global Average Pooling Node and add it to a Subgraph. 520 /// 521 /// @param subgraph - a Subgraph object that will own the created Node. 522 /// @param output_min - lower bound for clipping output values. 523 /// @param output_max - upper bound for clipping output values. 524 /// @param input_id - Value ID for the input tensor. The input tensor must be a dense tensor with 3 or more dimensions 525 /// defined in the @a subgraph. Averaging is performed across the second- and third-innermost 526 /// dimensions. 527 /// @param output_id - Value ID for the output tensor. The output tensor must be a dense tensor with 3 or more 528 /// dimensions defined in the @a subgraph. 529 /// @param flags - binary features of the 2D Global Average Pooling Node. No supported flags are currently defined. 530 enum xnn_status xnn_define_global_average_pooling_2d( 531 xnn_subgraph_t subgraph, 532 float output_min, 533 float output_max, 534 uint32_t input_id, 535 uint32_t output_id, 536 uint32_t flags); 537 538 /// Define a 2D Average Pooling Node and add it to a Subgraph. 539 /// 540 /// @param subgraph - a Subgraph object that will own the created Node. 541 /// @param input_padding_top - implicit zero-padding above 2D input data. Must be 0 if XNN_FLAG_TENSORFLOW_SAME_PADDING 542 /// flag is specified. 543 /// @param input_padding_right - implicit zero-padding to the right of 2D input data. Must be 0 if 544 /// XNN_FLAG_TENSORFLOW_SAME_PADDING flag is specified. 545 /// @param input_padding_bottom - implicit zero-padding below 2D input data. Must be 0 if 546 /// XNN_FLAG_TENSORFLOW_SAME_PADDING flag is specified. 547 /// @param input_padding_left - implicit zero-padding to the left of 2D input data. Must be 0 if 548 /// XNN_FLAG_TENSORFLOW_SAME_PADDING flag is specified. 549 /// @param pooling_height - pooling (kernel) height. 550 /// @param pooling_width - pooling (kernel) width. 551 /// @param stride_height - displacing of the pooling window in the vertical dimension of the input pixels corresponding 552 /// to vertically adjacent output pixels. 553 /// @param stride_width - displacing of the pooling window in the horizontal dimension of the input pixels corresponding 554 /// to horizontally adjacent output pixels. 555 /// @param output_min - lower bound for clipping output values. 556 /// @param output_max - upper bound for clipping output values. 557 /// @param input_id - Value ID for the input tensor. The input tensor must be a 4D tensor defined in the @a subgraph 558 /// with [N, IH, IW, channels] dimensions 559 /// @param output_id - Value ID for the output tensor. The output tensor must be a 4D tensor defined in the @a subgraph 560 /// with [N, OH, OW, channels] dimensions. 561 /// @param flags - binary features of the 2D Average Pooling Node. The only currently supported values is 562 /// XNN_FLAG_TENSORFLOW_SAME_PADDING. 563 enum xnn_status xnn_define_average_pooling_2d( 564 xnn_subgraph_t subgraph, 565 uint32_t input_padding_top, 566 uint32_t input_padding_right, 567 uint32_t input_padding_bottom, 568 uint32_t input_padding_left, 569 uint32_t pooling_height, 570 uint32_t pooling_width, 571 uint32_t stride_height, 572 uint32_t stride_width, 573 float output_min, 574 float output_max, 575 uint32_t input_id, 576 uint32_t output_id, 577 uint32_t flags); 578 579 /// Define a Fully Connected Node and add it to a Subgraph. 580 /// 581 /// @param subgraph - a Subgraph object that will own the created Node. 582 /// @param output_min - lower bound for clipping output values. 583 /// @param output_max - upper bound for clipping output values. 584 /// @param input_id - Value ID for the input tensor. The input tensor must be an N-dimensional tensor defined in the 585 /// @a subgraph. If XNN_FLAG_TENSORFLOW_RESHAPE_2D is not specified, the input tensor must be at least 586 /// 1D and its last dimension must match the last dimension of the filter tensor. In particular, if 587 /// input is a 2D tensor, it must have [batch_size, input_channels] dimensions. 588 /// If XNN_FLAG_TENSORFLOW_RESHAPE_2D is specified, the number of elements in the input tensor must be 589 /// divisible by the input_channels. The tensor will be first flattened into a 1D tensor of 590 /// [num_input_elements] dimensions, then reshaped into a 2D tensor of 591 /// [num_input_elements / input_channels, input_channels] dimensions where num_input_elements is the 592 /// total number of elements in the input tensor. 593 /// @param filter_id - Value ID for the filter tensor. The filter tensor must a 2D tensor defined in the @a subgraph. 594 /// If the XNN_FLAG_TRANSPOSE_WEIGHTS flag is not specified, the filter tensor must have 595 /// [output_channels, input_channels] dimensions. If the XNN_FLAG_TRANSPOSE_WEIGHTS flag is 596 /// specified, the filter tensor must have [input_channels, output_channels] dimensions. 597 /// @param bias_id - Value ID for the bias tensor, or XNN_INVALID_VALUE_ID for a Fully Connected Node without a bias. 598 /// If present, the bias tensor must be a 1D tensor defined in the @a subgraph with [output_channels] 599 /// dimensions. 600 /// @param output_id - Value ID for the output tensor. The output tensor must be defined in the @a subgraph. 601 /// If XNN_FLAG_TENSORFLOW_RESHAPE_2D is not specified, the output tensor must have the same 602 /// dimensionality as the input tensor, all its dimensions but the last one must match the 603 /// corresponding dimensions of the input tensor, and the last dimensions of the output tensor must 604 /// match the first dimension of the filter tensor. In particular, if input is a 2D tensor, output 605 /// must be a 2D tensor of [batch_size, output_channels] dimensions. 606 /// If XNN_FLAG_TENSORFLOW_RESHAPE_2D is specified, output must be a 2D tensor of 607 /// [num_input_elements / input_channels, output_channels] dimensions where num_input_elements is the 608 /// total number of elements in the input tensor. 609 /// @param flags - binary features of the Fully Connected Node. The only currently supported values are 610 /// XNN_FLAG_TENSORFLOW_RESHAPE_2D and XNN_FLAG_TRANSPOSE_WEIGHTS. 611 enum xnn_status xnn_define_fully_connected( 612 xnn_subgraph_t subgraph, 613 float output_min, 614 float output_max, 615 uint32_t input_id, 616 uint32_t filter_id, 617 uint32_t bias_id, 618 uint32_t output_id, 619 uint32_t flags); 620 621 /// Define a 2D Max Pooling Node and add it to a Subgraph. 622 /// 623 /// @param subgraph - a Subgraph object that will own the created Node. 624 /// @param input_padding_top - implicit zero-padding above 2D input data. Must be 0 if XNN_FLAG_TENSORFLOW_SAME_PADDING 625 /// flag is specified. 626 /// @param input_padding_right - implicit zero-padding to the right of 2D input data. Must be 0 if 627 /// XNN_FLAG_TENSORFLOW_SAME_PADDING flag is specified. 628 /// @param input_padding_bottom - implicit zero-padding below 2D input data. Must be 0 if 629 /// XNN_FLAG_TENSORFLOW_SAME_PADDING flag is specified. 630 /// @param input_padding_left - implicit zero-padding to the left of 2D input data. Must be 0 if 631 /// XNN_FLAG_TENSORFLOW_SAME_PADDING flag is specified. 632 /// @param pooling_height - pooling (kernel) height. 633 /// @param pooling_width - pooling (kernel) width. 634 /// @param stride_height - displacing of the pooling window in the vertical dimension of the input pixels corresponding 635 /// to vertically adjacent output pixels. 636 /// @param stride_width - displacing of the pooling window in the horizontal dimension of the input pixels corresponding 637 /// to horizontally adjacent output pixels. 638 /// @param dilation_height - dilation of pooling elements along the height dimension. 639 /// @param dilation_width - dilation of pooling elements along the width dimension. 640 /// @param output_min - lower bound for clipping output values. 641 /// @param output_max - upper bound for clipping output values. 642 /// @param input_id - Value ID for the input tensor. The input tensor must be a 4D tensor defined in the @a subgraph 643 /// with [N, IH, IW, channels] dimensions 644 /// @param output_id - Value ID for the output tensor. The output tensor must be a 4D tensor defined in the @a subgraph 645 /// with [N, OH, OW, channels] dimensions. 646 /// @param flags - binary features of the 2D Max Pooling Node. The only currently supported values is 647 /// XNN_FLAG_TENSORFLOW_SAME_PADDING. 648 enum xnn_status xnn_define_max_pooling_2d( 649 xnn_subgraph_t subgraph, 650 uint32_t input_padding_top, 651 uint32_t input_padding_right, 652 uint32_t input_padding_bottom, 653 uint32_t input_padding_left, 654 uint32_t pooling_height, 655 uint32_t pooling_width, 656 uint32_t stride_height, 657 uint32_t stride_width, 658 uint32_t dilation_height, 659 uint32_t dilation_width, 660 float output_min, 661 float output_max, 662 uint32_t input_id, 663 uint32_t output_id, 664 uint32_t flags); 665 666 /// Define a 2D ArgMax Pooling Node and add it to a Subgraph. 667 /// 668 /// @param subgraph - a Subgraph object that will own the created Node. 669 /// @param input_padding_top - implicit zero-padding above 2D input data. 670 /// @param input_padding_right - implicit zero-padding to the right of 2D input data. 671 /// @param input_padding_bottom - implicit zero-padding below 2D input data. 672 /// @param input_padding_left - implicit zero-padding to the left of 2D input data. 673 /// @param pooling_height - pooling (kernel) height. Vertical stride between pooling regions match this value. 674 /// @param pooling_width - pooling (kernel) width. Horizontal stride between pooling regions match this value. 675 /// @param input_id - Value ID for the input tensor. The input tensor must be a 4D tensor defined in the @a subgraph 676 /// with [N, IH, IW, channels] dimensions 677 /// @param output_value_id - Value ID for the output tensor with the maximum values in the pools. The output tensor must 678 /// be a 4D tensor defined in the @a subgraph with [N, OH, OW, channels] dimensions. 679 /// @param output_index_id - Value ID for the output tensor with the indexes of the maximum values in the pools. The 680 /// output tensor must be a 4D tensor defined in the @a subgraph with [N, OH, OW, channels] 681 /// dimensions. 682 /// @param flags - binary features of the 2D ArgMax Pooling Node. No supported flags are currently defined. 683 enum xnn_status xnn_define_argmax_pooling_2d( 684 xnn_subgraph_t subgraph, 685 uint32_t input_padding_top, 686 uint32_t input_padding_right, 687 uint32_t input_padding_bottom, 688 uint32_t input_padding_left, 689 uint32_t pooling_height, 690 uint32_t pooling_width, 691 uint32_t input_id, 692 uint32_t output_value_id, 693 uint32_t output_index_id, 694 uint32_t flags); 695 696 /// Define a 2D UnPooling Node and add it to a Subgraph. 697 /// 698 /// @param subgraph - a Subgraph object that will own the created Node. 699 /// @param padding_top - implicit padding above 2D output data. 700 /// @param padding_right - implicit padding to the right of 2D output data. 701 /// @param padding_bottom - implicit padding below 2D output data. 702 /// @param padding_left - implicit padding to the left of 2D output data. 703 /// @param pooling_height - height of the pooling window. 704 /// @param pooling_width - width of the pooling window. 705 /// @param input_value_id - Value ID for the input tensor with the max-pooling values to invert. The input value tensor 706 /// must be a 4D tensor defined in the @a subgraph with [N, IH, IW, channels] dimensions. 707 /// @param input_index_id - Value ID for the input tensor with the indices of the per-pool maximum values produced by 708 /// a 2D UnPooling Node. The input tensor must be a 4D tensor defined in the @a subgraph with 709 /// [N, IH, IW, channels] dimensions. 710 /// @param output_id - Value ID for the output tensor. The output tensor must be a 4D tensor defined in the @a subgraph 711 /// with [N, OH, OW, channels] dimensions. 712 /// @param flags - binary features of the 2D UnPooling Node. No supported flags are currently defined. 713 enum xnn_status xnn_define_unpooling_2d( 714 xnn_subgraph_t subgraph, 715 uint32_t padding_top, 716 uint32_t padding_right, 717 uint32_t padding_bottom, 718 uint32_t padding_left, 719 uint32_t pooling_height, 720 uint32_t pooling_width, 721 uint32_t input_value_id, 722 uint32_t input_index_id, 723 uint32_t output_id, 724 uint32_t flags); 725 726 /// Define a 2-Input Add Node and add it to a Subgraph. 727 /// 728 /// The 2-Input Add Node computes elementwise addition of two tensor inputs with numpy broadcasting rules. 729 /// 730 /// @param subgraph - a Subgraph object that will own the created Node. 731 /// @param output_min - lower bound for clipping output values. 732 /// @param output_max - upper bound for clipping output values. 733 /// @param input1_id - Value ID for the first input tensor. The input tensor must be an N-dimensional tensor defined in 734 /// the @a subgraph with each dimension either equal to the corresponding dimension of the second 735 /// input, or equal to 1. In the latter case, the elements of the input tensor are broadcasted along 736 /// that dimension. 737 /// @param input2_id - Value ID for the second input tensor. The input tensor must be an M-dimensional tensor defined in 738 /// the @a subgraph with each dimension either equal to the corresponding dimension of the first 739 /// input, or equal to 1. In the latter case, the elements of the input tensor are broadcasted along 740 /// that dimension. 741 /// @param output_id - Value ID for the output tensor. The output tensor must be a max(N,M)-dimensional tensor defined 742 /// in the @a subgraph with each dimension equal to the maximum between the corresponding dimension 743 /// of the two inputs. 744 /// @param flags - binary features of the Add Node. No supported flags are currently defined. 745 enum xnn_status xnn_define_add2( 746 xnn_subgraph_t subgraph, 747 float output_min, 748 float output_max, 749 uint32_t input1_id, 750 uint32_t input2_id, 751 uint32_t output_id, 752 uint32_t flags); 753 754 /// Define a 2-Input Multiply Node and add it to a Subgraph. 755 /// 756 /// The 2-Input Multiply Node computes elementwise multiplication of two tensor inputs with numpy broadcasting rules. 757 /// 758 /// @param subgraph - a Subgraph object that will own the created Node. 759 /// @param output_min - lower bound for clipping output values. 760 /// @param output_max - upper bound for clipping output values. 761 /// @param input1_id - Value ID for the first input tensor. The input tensor must be an N-dimensional tensor defined in 762 /// the @a subgraph with each dimension either equal to the corresponding dimension of the second 763 /// input, or equal to 1. In the latter case, the elements of the input tensor are broadcasted along 764 /// that dimension. 765 /// @param input2_id - Value ID for the second input tensor. The input tensor must be an M-dimensional tensor defined in 766 /// the @a subgraph with each dimension either equal to the corresponding dimension of the first 767 /// input, or equal to 1. In the latter case, the elements of the input tensor are broadcasted along 768 /// that dimension. 769 /// @param output_id - Value ID for the output tensor. The output tensor must be a max(N,M)-dimensional tensor defined 770 /// in the @a subgraph with each dimension equal to the maximum between the corresponding dimension 771 /// of the two inputs. 772 /// @param flags - binary features of the Multiply Node. No supported flags are currently defined. 773 enum xnn_status xnn_define_multiply2( 774 xnn_subgraph_t subgraph, 775 float output_min, 776 float output_max, 777 uint32_t input1_id, 778 uint32_t input2_id, 779 uint32_t output_id, 780 uint32_t flags); 781 782 /// Define a Subtract Node and add it to a Subgraph. 783 /// 784 /// The Subtract Node computes elementwise subtraction of two tensor inputs with numpy broadcasting rules. 785 /// 786 /// @param subgraph - a Subgraph object that will own the created Node. 787 /// @param output_min - lower bound for clipping output values. 788 /// @param output_max - upper bound for clipping output values. 789 /// @param input1_id - Value ID for the first input tensor. The input tensor must be an N-dimensional tensor defined in 790 /// the @a subgraph with each dimension either equal to the corresponding dimension of the second 791 /// input, or equal to 1. In the latter case, the elements of the input tensor are broadcasted along 792 /// that dimension. 793 /// @param input2_id - Value ID for the second input tensor. The input tensor must be an M-dimensional tensor defined in 794 /// the @a subgraph with each dimension either equal to the corresponding dimension of the first 795 /// input, or equal to 1. In the latter case, the elements of the input tensor are broadcasted along 796 /// that dimension. 797 /// @param output_id - Value ID for the output tensor. The output tensor must be a max(N,M)-dimensional tensor defined 798 /// in the @a subgraph with each dimension equal to the maximum between the corresponding dimension 799 /// of the two inputs. 800 /// @param flags - binary features of the Subtract Node. No supported flags are currently defined. 801 enum xnn_status xnn_define_subtract( 802 xnn_subgraph_t subgraph, 803 float output_min, 804 float output_max, 805 uint32_t input1_id, 806 uint32_t input2_id, 807 uint32_t output_id, 808 uint32_t flags); 809 810 /// Define a Divide Node and add it to a Subgraph. 811 /// 812 /// The Divide Node computes elementwise division of two tensor inputs with numpy broadcasting rules. 813 /// 814 /// @param subgraph - a Subgraph object that will own the created Node. 815 /// @param output_min - lower bound for clipping output values. 816 /// @param output_max - upper bound for clipping output values. 817 /// @param input1_id - Value ID for the first input tensor. The input tensor must be an N-dimensional tensor defined in 818 /// the @a subgraph with each dimension either equal to the corresponding dimension of the second 819 /// input, or equal to 1. In the latter case, the elements of the input tensor are broadcasted along 820 /// that dimension. 821 /// @param input2_id - Value ID for the second input tensor. The input tensor must be an M-dimensional tensor defined in 822 /// the @a subgraph with each dimension either equal to the corresponding dimension of the first 823 /// input, or equal to 1. In the latter case, the elements of the input tensor are broadcasted along 824 /// that dimension. 825 /// @param output_id - Value ID for the output tensor. The output tensor must be a max(N,M)-dimensional tensor defined 826 /// in the @a subgraph with each dimension equal to the maximum between the corresponding dimension 827 /// of the two inputs. 828 /// @param flags - binary features of the Divide Node. No supported flags are currently defined. 829 enum xnn_status xnn_define_divide( 830 xnn_subgraph_t subgraph, 831 float output_min, 832 float output_max, 833 uint32_t input1_id, 834 uint32_t input2_id, 835 uint32_t output_id, 836 uint32_t flags); 837 838 /// Define a 2-Input Maximum Node and add it to a Subgraph. 839 /// 840 /// The 2-Input Maximum Node computes elementwise maximum of two tensor inputs with numpy broadcasting rules. 841 /// 842 /// @param subgraph - a Subgraph object that will own the created Node. 843 /// @param input1_id - Value ID for the first input tensor. The input tensor must be an N-dimensional tensor defined in 844 /// the @a subgraph with each dimension either equal to the corresponding dimension of the second 845 /// input, or equal to 1. In the latter case, the elements of the input tensor are broadcasted along 846 /// that dimension. 847 /// @param input2_id - Value ID for the second input tensor. The input tensor must be an M-dimensional tensor defined in 848 /// the @a subgraph with each dimension either equal to the corresponding dimension of the first 849 /// input, or equal to 1. In the latter case, the elements of the input tensor are broadcasted along 850 /// that dimension. 851 /// @param output_id - Value ID for the output tensor. The output tensor must be a max(N,M)-dimensional tensor defined 852 /// in the @a subgraph with each dimension equal to the maximum between the corresponding dimension 853 /// of the two inputs. 854 /// @param flags - binary features of the Maximum Node. No supported flags are currently defined. 855 enum xnn_status xnn_define_maximum2( 856 xnn_subgraph_t subgraph, 857 uint32_t input1_id, 858 uint32_t input2_id, 859 uint32_t output_id, 860 uint32_t flags); 861 862 /// Define a 2-Input Minimum Node and add it to a Subgraph. 863 /// 864 /// The 2-Input Minimum Node computes elementwise minimum of two tensor inputs with numpy broadcasting rules. 865 /// 866 /// @param subgraph - a Subgraph object that will own the created Node. 867 /// @param input1_id - Value ID for the first input tensor. The input tensor must be an N-dimensional tensor defined in 868 /// the @a subgraph with each dimension either equal to the corresponding dimension of the second 869 /// input, or equal to 1. In the latter case, the elements of the input tensor are broadcasted along 870 /// that dimension. 871 /// @param input2_id - Value ID for the second input tensor. The input tensor must be an M-dimensional tensor defined in 872 /// the @a subgraph with each dimension either equal to the corresponding dimension of the first 873 /// input, or equal to 1. In the latter case, the elements of the input tensor are broadcasted along 874 /// that dimension. 875 /// @param output_id - Value ID for the output tensor. The output tensor must be a max(N,M)-dimensional tensor defined 876 /// in the @a subgraph with each dimension equal to the maximum between the corresponding dimension 877 /// of the two inputs. 878 /// @param flags - binary features of the Minimum Node. No supported flags are currently defined. 879 enum xnn_status xnn_define_minimum2( 880 xnn_subgraph_t subgraph, 881 uint32_t input1_id, 882 uint32_t input2_id, 883 uint32_t output_id, 884 uint32_t flags); 885 886 /// Define a Squared Difference Node and add it to a Subgraph. 887 /// 888 /// The Squared Difference Node computes elementwise squared difference of two tensor inputs with numpy broadcasting 889 /// rules. 890 /// 891 /// @param subgraph - a Subgraph object that will own the created Node. 892 /// @param input1_id - Value ID for the first input tensor. The input tensor must be an N-dimensional tensor defined in 893 /// the @a subgraph with each dimension either equal to the corresponding dimension of the second 894 /// input, or equal to 1. In the latter case, the elements of the input tensor are broadcasted along 895 /// that dimension. 896 /// @param input2_id - Value ID for the second input tensor. The input tensor must be an M-dimensional tensor defined in 897 /// the @a subgraph with each dimension either equal to the corresponding dimension of the first 898 /// input, or equal to 1. In the latter case, the elements of the input tensor are broadcasted along 899 /// that dimension. 900 /// @param output_id - Value ID for the output tensor. The output tensor must be a max(N,M)-dimensional tensor defined 901 /// in the @a subgraph with each dimension equal to the maximum between the corresponding dimension 902 /// of the two inputs. 903 /// @param flags - binary features of the Squared Difference Node. No supported flags are currently defined. 904 enum xnn_status xnn_define_squared_difference( 905 xnn_subgraph_t subgraph, 906 uint32_t input1_id, 907 uint32_t input2_id, 908 uint32_t output_id, 909 uint32_t flags); 910 911 /// Define a Constant Pad Node with static padding specification and add it to a Subgraph. 912 /// 913 /// @param subgraph - a Subgraph object that will own the created Node. 914 /// @param pre_paddings - number of padding elements to insert before input elements for every dimension. This array 915 /// must have as many elements as the the number of dimensions in the input tensor. 916 /// @param post_paddings - number of padding elements to insert after input elements for every dimension. This array 917 /// must have as many elements as the the number of dimensions in the input tensor. 918 /// @param padding_value - constant value used to initialize padding elements. 919 /// @param input_id - Value ID for the input tensor. The input tensor must be defined in the @a subgraph. 920 /// @param output_id - Value ID for the output tensor. The output tensor must be defined in the @a subgraph, and its 921 /// shape must match the shape of the input tensor with padding. 922 /// @param flags - binary features of the Constant Pad Node. No supported flags are currently defined. 923 enum xnn_status xnn_define_static_constant_pad( 924 xnn_subgraph_t subgraph, 925 const size_t* pre_paddings, 926 const size_t* post_paddings, 927 float padding_value, 928 uint32_t input_id, 929 uint32_t output_id, 930 uint32_t flags); 931 932 /// Define a 2-Input Concatenate Node and add it to a Subgraph. 933 /// 934 /// The 2-Input Concatenate Node concatenates two tensors along a specified axis. 935 /// 936 /// @param subgraph - a Subgraph object that will own the created Node. 937 /// @param axis - the axis to concatenate the two input tensors along 938 /// @param input1_id - Value ID for the first input tensor. The input tensor must be an N-dimensional tensor defined in 939 /// the @a subgraph with each dimension, except the axis, equal to the corresponding dimension of the 940 /// second input. 941 /// @param input2_id - Value ID for the second input tensor. The input tensor must be an N-dimensional tensor defined in 942 /// the @a subgraph with each dimension, except the axis, equal to the corresponding dimension of the 943 /// first input. 944 /// @param output_id - Value ID for the output tensor. The output tensor must be a N-dimensional tensor defined 945 /// in the @a subgraph with each dimension equal to the dimension of both inputs, except the axis 946 /// dimension, where it is the sum of the corresponding dimensions of both inputs. 947 /// @param flags - binary features of the Concatenate Node. No supported flags are currently defined. 948 enum xnn_status xnn_define_concatenate2( 949 xnn_subgraph_t subgraph, 950 size_t axis, 951 uint32_t input1_id, 952 uint32_t input2_id, 953 uint32_t output_id, 954 uint32_t flags); 955 956 /// Define a 3-Input Concatenate Node and add it to a Subgraph. 957 /// 958 /// The 3-Input Concatenate Node concatenates three tensors along a specified axis. 959 /// 960 /// @param subgraph - a Subgraph object that will own the created Node. 961 /// @param axis - the axis to concatenate the three input tensors along 962 /// @param input1_id - Value ID for the first input tensor. The input tensor must be an N-dimensional tensor defined in 963 /// the @a subgraph with each dimension, except the axis, equal to the corresponding dimension of the 964 /// other inputs. 965 /// @param input2_id - Value ID for the second input tensor. The input tensor must be an N-dimensional tensor defined in 966 /// the @a subgraph with each dimension, except the axis, equal to the corresponding dimension of the 967 /// other inputs. 968 /// @param input3_id - Value ID for the third input tensor. The input tensor must be an N-dimensional tensor defined in 969 /// the @a subgraph with each dimension, except the axis, equal to the corresponding dimension of the 970 /// other inputs. 971 /// @param output_id - Value ID for the output tensor. The output tensor must be a N-dimensional tensor defined 972 /// in the @a subgraph with each dimension equal to the dimension of all inputs, except the axis 973 /// dimension, where it is the sum of the corresponding dimensions of all inputs. 974 /// @param flags - binary features of the Concatenate Node. No supported flags are currently defined. 975 enum xnn_status xnn_define_concatenate3( 976 xnn_subgraph_t subgraph, 977 size_t axis, 978 uint32_t input1_id, 979 uint32_t input2_id, 980 uint32_t input3_id, 981 uint32_t output_id, 982 uint32_t flags); 983 984 /// Define a 4-Input Concatenate Node and add it to a Subgraph. 985 /// 986 /// The 4-Input Concatenate Node concatenates four tensors along a specified axis. 987 /// 988 /// @param subgraph - a Subgraph object that will own the created Node. 989 /// @param axis - the axis to concatenate the four input tensors along 990 /// @param input1_id - Value ID for the first input tensor. The input tensor must be an N-dimensional tensor defined in 991 /// the @a subgraph with each dimension, except the axis, equal to the corresponding dimension of the 992 /// other inputs. 993 /// @param input2_id - Value ID for the second input tensor. The input tensor must be an N-dimensional tensor defined in 994 /// the @a subgraph with each dimension, except the axis, equal to the corresponding dimension of the 995 /// other inputs. 996 /// @param input3_id - Value ID for the third input tensor. The input tensor must be an N-dimensional tensor defined in 997 /// the @a subgraph with each dimension, except the axis, equal to the corresponding dimension of the 998 /// other inputs. 999 /// @param input4_id - Value ID for the fourth input tensor. The input tensor must be an N-dimensional tensor defined in 1000 /// the @a subgraph with each dimension, except the axis, equal to the corresponding dimension of the 1001 /// other inputs. 1002 /// @param output_id - Value ID for the output tensor. The output tensor must be a N-dimensional tensor defined 1003 /// in the @a subgraph with each dimension equal to the dimension of all inputs, except the axis 1004 /// dimension, where it is the sum of the corresponding dimensions of all inputs. 1005 /// @param flags - binary features of the Concatenate Node. No supported flags are currently defined. 1006 enum xnn_status xnn_define_concatenate4( 1007 xnn_subgraph_t subgraph, 1008 size_t axis, 1009 uint32_t input1_id, 1010 uint32_t input2_id, 1011 uint32_t input3_id, 1012 uint32_t input4_id, 1013 uint32_t output_id, 1014 uint32_t flags); 1015 1016 /// Define a 2-Output Split Node and add it to a Subgraph. 1017 /// 1018 /// The 2-Output Split Node splits an input tensor into two output tensors along a specified axis evenly. 1019 /// 1020 /// @param subgraph - a Subgraph object that will own the created Node. 1021 /// @param split_dim - the dimension to split the input tensor along 1022 /// @param input_id - Value ID for the input tensor. The input tensor must be an N-dimensional tensor defined in the @a 1023 /// subgraph. 1024 /// @param output1_id - Value ID for the first output tensor. The output tensor must be an N-dimensional tensor defined 1025 /// in the @a subgraph with each dimension, except the axis, equal to the corresponding dimension 1026 /// of the second output. The split_dim dimension is half of the input's split_dim. 1027 /// @param output2_id - Value ID for the second output tensor. The output tensor must be an N-dimensional tensor 1028 /// defined in the @a subgraph with each dimension, except the axis, equal to the corresponding 1029 /// dimension of the first output. The split_dim dimension is half of the input's split_dim. 1030 /// @param flags - binary features of the Split Node. No supported flags are currently defined. 1031 enum xnn_status xnn_define_even_split2( 1032 xnn_subgraph_t subgraph, 1033 size_t split_dim, 1034 uint32_t input_id, 1035 uint32_t output1_id, 1036 uint32_t output2_id, 1037 uint32_t flags); 1038 1039 /// Define a 3-Output Split Node and add it to a Subgraph. 1040 /// 1041 /// The 3-Output Split Node splits an input tensor into three output tensors along a specified axis evenly. 1042 /// 1043 /// @param subgraph - a Subgraph object that will own the created Node. 1044 /// @param split_dim - the dimension to split the input tensor along 1045 /// @param input_id - Value ID for the input tensor. The input tensor must be an N-dimensional tensor defined in the @a 1046 /// subgraph. 1047 /// @param output1_id - Value ID for the first output tensor. The output tensor must be an N-dimensional tensor defined 1048 /// in the @a subgraph with each dimension, except the axis, equal to the corresponding dimension 1049 /// of the second and third output. The split_dim dimension is one third of the input's split_dim. 1050 /// @param output2_id - Value ID for the second output tensor. The output tensor must be an N-dimensional tensor 1051 /// defined in the @a subgraph with each dimension, except the axis, equal to the corresponding 1052 /// dimension of the first and third output. The split_dim dimension is one third of the input's 1053 /// split_dim. 1054 /// @param output3_id - Value ID for the third output tensor. The output tensor must be an N-dimensional tensor 1055 /// defined in the @a subgraph with each dimension, except the axis, equal to the corresponding 1056 /// dimension of the second and third output. The split_dim dimension is one third of the input's 1057 /// split_dim. 1058 /// @param flags - binary features of the Split Node. No supported flags are currently defined. 1059 enum xnn_status xnn_define_even_split3( 1060 xnn_subgraph_t subgraph, 1061 size_t split_dim, 1062 uint32_t input_id, 1063 uint32_t output1_id, 1064 uint32_t output2_id, 1065 uint32_t output3_id, 1066 uint32_t flags); 1067 1068 /// Define a 4-Output Split Node and add it to a Subgraph. 1069 /// 1070 /// The 4-Output Split Node splits an input tensor into four output tensors along a specified axis evenly. 1071 /// 1072 /// @param subgraph - a Subgraph object that will own the created Node. 1073 /// @param split_dim - the dimension to split the input tensor along 1074 /// @param input_id - Value ID for the input tensor. The input tensor must be an N-dimensional tensor defined in the @a 1075 /// subgraph. 1076 /// @param output1_id - Value ID for the first output tensor. The output tensor must be an N-dimensional tensor defined 1077 /// in the @a subgraph with each dimension, except the axis, equal to the corresponding dimension 1078 /// of the other output tensors. The split_dim dimension is one fourth of the input's split_dim. 1079 /// @param output2_id - Value ID for the second output tensor. The output tensor must be an N-dimensional tensor 1080 /// defined in the @a subgraph with each dimension, except the axis, equal to the corresponding 1081 /// dimension of the other output tensors. The split_dim dimension is one fourth of the input's 1082 /// split_dim. 1083 /// @param output3_id - Value ID for the third output tensor. The output tensor must be an N-dimensional tensor 1084 /// defined in the @a subgraph with each dimension, except the axis, equal to the corresponding 1085 /// dimension of the other output tensors. The split_dim dimension is one fourth of the input's 1086 /// split_dim. 1087 /// @param output4_id - Value ID for the fourth output tensor. The output tensor must be an N-dimensional tensor 1088 /// defined in the @a subgraph with each dimension, except the axis, equal to the corresponding 1089 /// dimension of the other output tensors. The split_dim dimension is one fourth of the input's 1090 /// split_dim. 1091 /// @param flags - binary features of the Split Node. No supported flags are currently defined. 1092 enum xnn_status xnn_define_even_split4( 1093 xnn_subgraph_t subgraph, 1094 size_t split_dim, 1095 uint32_t input_id, 1096 uint32_t output1_id, 1097 uint32_t output2_id, 1098 uint32_t output3_id, 1099 uint32_t output4_id, 1100 uint32_t flags); 1101 1102 /// Define a Reshape Node with static shape specification and add it to a Subgraph. 1103 /// 1104 /// @param subgraph - a Subgraph object that will own the created Node. 1105 /// @param num_dims - number of shape dimensions in the output tensor. 1106 /// @param new_shape - shape dimensions of the output tensor. 1107 /// @param input_id - Value ID for the input tensor. The input tensor must be defined in the @a subgraph. 1108 /// @param output_id - Value ID for the output tensor. The output tensor must be defined in the @a subgraph, and its 1109 /// shape must match the shape of the input tensor with padding. 1110 /// @param flags - binary features of the Reshape Node. No supported flags are currently defined. 1111 enum xnn_status xnn_define_static_reshape( 1112 xnn_subgraph_t subgraph, 1113 size_t num_dims, 1114 const size_t* new_shape, 1115 uint32_t input_id, 1116 uint32_t output_id, 1117 uint32_t flags); 1118 1119 /// Define a 2D Resize Bilinear Node with static output height & width specification and add it to a Subgraph. 1120 /// 1121 /// @param subgraph - a Subgraph object that will own the created Node. 1122 /// @param new_height - height dimension of the output tensor. 1123 /// @param new_width - width dimension of the output tensor. 1124 /// @param input_id - Value ID for the input tensor. The input tensor must be a 4D tensor defined in the @a subgraph 1125 /// with [N, H, W, C] dimensions. 1126 /// @param output_id - Value ID for the output tensor. The output tensor must be a 4D tensor defined in the @a subgraph 1127 /// with [N, new_height, new_width, C] dimensions. 1128 /// @param flags - binary features of the 2D Resize Bilinear Node. The only currently supported values are 1129 /// XNN_FLAG_TENSORFLOW_LEGACY_MODE and XNN_FLAG_ALIGN_CORNERS, which are mutually exclusive. 1130 enum xnn_status xnn_define_static_resize_bilinear_2d( 1131 xnn_subgraph_t subgraph, 1132 size_t new_height, 1133 size_t new_width, 1134 uint32_t input_id, 1135 uint32_t output_id, 1136 uint32_t flags); 1137 1138 /// Define a PReLU (Parametric ReLU) Node and add it to a Subgraph. 1139 /// 1140 /// @param subgraph - a Subgraph object that will own the created Node. 1141 /// @param input_id - Value ID for the input tensor. The input tensor must be a 4D tensor defined in the @a subgraph 1142 /// with [N, H, W, channels] dimensions. 1143 /// @param slope_id - Value ID for the bias tensor. The bias tensor must be a 1D tensor defined in the @a subgraph with 1144 /// [channels] dimensions. 1145 /// @param output_id - Value ID for the output tensor. The output tensor must be a 4D tensor defined in the @a subgraph 1146 /// with [N, H, W, channels] dimensions. 1147 /// @param flags - binary features of the PReLU Node. No supported flags are currently defined. 1148 enum xnn_status xnn_define_prelu( 1149 xnn_subgraph_t subgraph, 1150 uint32_t input_id, 1151 uint32_t slope_id, 1152 uint32_t output_id, 1153 uint32_t flags); 1154 1155 /// Define a Abs Node and add it to a Subgraph. 1156 /// 1157 /// @param subgraph - a Subgraph object that will own the created Node. 1158 /// @param input_id - Value ID for the input tensor. The input tensor must be defined in the @a subgraph. 1159 /// @param output_id - Value ID for the output tensor. The output tensor must be defined in the @a subgraph, and its 1160 /// shape must match the shape of the input tensor. 1161 /// @param flags - binary features of the Abs Node. No supported flags are currently defined. 1162 enum xnn_status xnn_define_abs( 1163 xnn_subgraph_t subgraph, 1164 uint32_t input_id, 1165 uint32_t output_id, 1166 uint32_t flags); 1167 1168 /// Define a Bankers' Rounding Node and add it to a Subgraph. 1169 /// 1170 /// @param subgraph - a Subgraph object that will own the created Node. 1171 /// @param input_id - Value ID for the input tensor. The input tensor must be defined in the @a subgraph. 1172 /// @param output_id - Value ID for the output tensor. The output tensor must be defined in the @a subgraph, and its 1173 /// shape must match the shape of the input tensor. 1174 /// @param flags - binary features of the Bankers' Rounding Node. No supported flags are currently defined. 1175 enum xnn_status xnn_define_bankers_rounding( 1176 xnn_subgraph_t subgraph, 1177 uint32_t input_id, 1178 uint32_t output_id, 1179 uint32_t flags); 1180 1181 /// Define a Ceiling Node and add it to a Subgraph. 1182 /// 1183 /// @param subgraph - a Subgraph object that will own the created Node. 1184 /// @param input_id - Value ID for the input tensor. The input tensor must be defined in the @a subgraph. 1185 /// @param output_id - Value ID for the output tensor. The output tensor must be defined in the @a subgraph, and its 1186 /// shape must match the shape of the input tensor. 1187 /// @param flags - binary features of the Ceiling Node. No supported flags are currently defined. 1188 enum xnn_status xnn_define_ceiling( 1189 xnn_subgraph_t subgraph, 1190 uint32_t input_id, 1191 uint32_t output_id, 1192 uint32_t flags); 1193 1194 /// Define a Clamp Node and add it to a Subgraph. 1195 /// 1196 /// @param subgraph - a Subgraph object that will own the created Node. 1197 /// @param output_min - lower bound for clipping output values. 1198 /// @param output_max - upper bound for clipping output values. 1199 /// @param input_id - Value ID for the input tensor. The input tensor must be defined in the @a subgraph. 1200 /// @param output_id - Value ID for the output tensor. The output tensor must be defined in the @a subgraph, and its 1201 /// shape must match the shape of the input tensor. 1202 /// @param flags - binary features of the Clamp Node. No supported flags are currently defined. 1203 enum xnn_status xnn_define_clamp( 1204 xnn_subgraph_t subgraph, 1205 float output_min, 1206 float output_max, 1207 uint32_t input_id, 1208 uint32_t output_id, 1209 uint32_t flags); 1210 1211 /// Define an ELU (Exponential Linear Unit) Node and add it to a Subgraph. 1212 /// 1213 /// @param subgraph - a Subgraph object that will own the created Node. 1214 /// @param alpha - scale factor for negative output elements. 1215 /// @param input_id - Value ID for the input tensor. The input tensor must be defined in the @a subgraph. 1216 /// @param output_id - Value ID for the output tensor. The output tensor must be defined in the @a subgraph, and its 1217 /// shape must match the shape of the input tensor. 1218 /// @param flags - binary features of the ELU Node. No supported flags are currently defined. 1219 enum xnn_status xnn_define_elu( 1220 xnn_subgraph_t subgraph, 1221 float alpha, 1222 uint32_t input_id, 1223 uint32_t output_id, 1224 uint32_t flags); 1225 1226 /// Define a Floor Node and add it to a Subgraph. 1227 /// 1228 /// @param subgraph - a Subgraph object that will own the created Node. 1229 /// @param input_id - Value ID for the input tensor. The input tensor must be defined in the @a subgraph. 1230 /// @param output_id - Value ID for the output tensor. The output tensor must be defined in the @a subgraph, and its 1231 /// shape must match the shape of the input tensor. 1232 /// @param flags - binary features of the Floor Node. No supported flags are currently defined. 1233 enum xnn_status xnn_define_floor( 1234 xnn_subgraph_t subgraph, 1235 uint32_t input_id, 1236 uint32_t output_id, 1237 uint32_t flags); 1238 1239 /// Define a HardSwish Node and add it to a Subgraph. 1240 /// 1241 /// @param subgraph - a Subgraph object that will own the created Node. 1242 /// @param input_id - Value ID for the input tensor. The input tensor must be defined in the @a subgraph. 1243 /// @param output_id - Value ID for the output tensor. The output tensor must be defined in the @a subgraph, and its 1244 /// shape must match the shape of the input tensor. 1245 /// @param flags - binary features of the HardSwish Node. No supported flags are currently defined. 1246 enum xnn_status xnn_define_hardswish( 1247 xnn_subgraph_t subgraph, 1248 uint32_t input_id, 1249 uint32_t output_id, 1250 uint32_t flags); 1251 1252 /// Define a Leaky ReLU Node and add it to a Subgraph. 1253 /// 1254 /// @param subgraph - a Subgraph object that will own the created Node. 1255 /// @param negative_slope - scale factor for negative input elements. 1256 /// @param input_id - Value ID for the input tensor. The input tensor must be defined in the @a subgraph. 1257 /// @param output_id - Value ID for the output tensor. The output tensor must be defined in the @a subgraph, and its 1258 /// shape must match the shape of the input tensor. 1259 /// @param flags - binary features of the Leaky ReLU Node. No supported flags are currently defined. 1260 enum xnn_status xnn_define_leaky_relu( 1261 xnn_subgraph_t subgraph, 1262 float negative_slope, 1263 uint32_t input_id, 1264 uint32_t output_id, 1265 uint32_t flags); 1266 1267 /// Define a Negate Node and add it to a Subgraph. 1268 /// 1269 /// @param subgraph - a Subgraph object that will own the created Node. 1270 /// @param input_id - Value ID for the input tensor. The input tensor must be defined in the @a subgraph. 1271 /// @param output_id - Value ID for the output tensor. The output tensor must be defined in the @a subgraph, and its 1272 /// shape must match the shape of the input tensor. 1273 /// @param flags - binary features of the Negate Node. No supported flags are currently defined. 1274 enum xnn_status xnn_define_negate( 1275 xnn_subgraph_t subgraph, 1276 uint32_t input_id, 1277 uint32_t output_id, 1278 uint32_t flags); 1279 1280 /// Define a Sigmoid Node and add it to a Subgraph. 1281 /// 1282 /// @param subgraph - a Subgraph object that will own the created Node. 1283 /// @param input_id - Value ID for the input tensor. The input tensor must be defined in the @a subgraph. 1284 /// @param output_id - Value ID for the output tensor. The output tensor must be defined in the @a subgraph, and its 1285 /// shape must match the shape of the input tensor. 1286 /// @param flags - binary features of the Sigmoid Node. No supported flags are currently defined. 1287 enum xnn_status xnn_define_sigmoid( 1288 xnn_subgraph_t subgraph, 1289 uint32_t input_id, 1290 uint32_t output_id, 1291 uint32_t flags); 1292 1293 /// Define a SoftMax Node and add it to a Subgraph. 1294 /// 1295 /// @param subgraph - a Subgraph object that will own the created Node. 1296 /// @param input_id - Value ID for the input tensor. The input tensor must be defined in the @a subgraph, and have at 1297 /// least one dimension. 1298 /// @param output_id - Value ID for the output tensor. The output tensor must be defined in the @a subgraph, and its 1299 /// shape must match the shape of the input tensor. 1300 /// @param flags - binary features of the SoftMax Node. No supported flags are currently defined. 1301 enum xnn_status xnn_define_softmax( 1302 xnn_subgraph_t subgraph, 1303 uint32_t input_id, 1304 uint32_t output_id, 1305 uint32_t flags); 1306 1307 /// Define a Square Node and add it to a Subgraph. 1308 /// 1309 /// @param subgraph - a Subgraph object that will own the created Node. 1310 /// @param input_id - Value ID for the input tensor. The input tensor must be defined in the @a subgraph. 1311 /// @param output_id - Value ID for the output tensor. The output tensor must be defined in the @a subgraph, and its 1312 /// shape must match the shape of the input tensor. 1313 /// @param flags - binary features of the Square Node. No supported flags are currently defined. 1314 enum xnn_status xnn_define_square( 1315 xnn_subgraph_t subgraph, 1316 uint32_t input_id, 1317 uint32_t output_id, 1318 uint32_t flags); 1319 1320 /// Define a Square Root Node and add it to a Subgraph. 1321 /// 1322 /// @param subgraph - a Subgraph object that will own the created Node. 1323 /// @param input_id - Value ID for the input tensor. The input tensor must be defined in the @a subgraph. 1324 /// @param output_id - Value ID for the output tensor. The output tensor must be defined in the @a subgraph, and its 1325 /// shape must match the shape of the input tensor. 1326 /// @param flags - binary features of the Square Root Node. No supported flags are currently defined. 1327 enum xnn_status xnn_define_square_root( 1328 xnn_subgraph_t subgraph, 1329 uint32_t input_id, 1330 uint32_t output_id, 1331 uint32_t flags); 1332 1333 /// Define a Static Transpose Node and add it to a Subgraph. 1334 /// 1335 /// The Static Transpose Node applies a generalized transpose to the input tensor using the permuation in perm. 1336 /// 1337 /// @param subgraph - a Subgraph object that will own the created Node. 1338 /// @param input_id - Value ID for the input tensor. The input tensor must be an N-dimensional tensor defined in 1339 /// the @a subgraph. 1340 /// @param output_id - Value ID for the output tensor. The output tensor must be an N-dimensional tensor defined 1341 /// in the @a subgraph with each dimension equal to its corresponding permuted input dimension. 1342 /// @param num_dims - the number of permutation dimensions. This must be equal to the number of input dimensions. 1343 /// @param perm - The permutation of the axis of the input tensor. The perm array must must contain 0 to N-1 in the 1344 /// permuted order. 1345 /// @param flags - binary features of the Static Transpose Node. No supported flags are currently defined. 1346 enum xnn_status xnn_define_static_transpose( 1347 xnn_subgraph_t subgraph, 1348 size_t num_dims, 1349 const size_t* perm, 1350 uint32_t input_id, 1351 uint32_t output_id, 1352 uint32_t flags); 1353 1354 /// Weights cache is a cache for packed weights. It can be reused between runtimes. 1355 typedef struct xnn_weights_cache* xnn_weights_cache_t; 1356 1357 enum xnn_status xnn_create_weights_cache(xnn_weights_cache_t* weights_cache_out); 1358 1359 /// Create a weights cache object specifying the initial size of weights cache (in bytes). 1360 /// @size - initial capacity of the weights cache (in bytes), i.e. it can hold size bytes without growing. 1361 /// @param weights_cache_out - pointer to the variable that will be initialized to a handle to the weights cache object 1362 /// upon successful return. Once created, the weights cache object can be shared between 1363 /// different Runtime objects. 1364 enum xnn_status xnn_create_weights_cache_with_size(size_t size, xnn_weights_cache_t* weights_cache_out); 1365 1366 1367 /// Weights cache can be finalized in these ways: 1368 enum xnn_weights_cache_finalization_kind { 1369 /// Weights cache is finalized, no insert operations into the weights cache is allowed, even if the "inserted" 1370 /// weights already exist in thee cache. Weights cache memory will also be trimmed to page boundary and set to 1371 /// read-only (to prevent writes). 1372 xnn_weights_cache_finalization_kind_hard, 1373 /// Weights cache will be finalized with some extra space at the end, this allows for "inserting" into the cache only 1374 /// if the weights are already in the cache, and errors on inserting uncached weights. There is memory overhead. 1375 xnn_weights_cache_finalization_kind_soft, 1376 }; 1377 1378 /// Finalizes the weights cache. The kind of finalization is specified by `finalization_kind`. 1379 /// @param weights_cache - the weights cache object to finalize. 1380 /// @param finalization_kind - the kind of finalization. 1381 enum xnn_status xnn_finalize_weights_cache( 1382 xnn_weights_cache_t weights_cache, 1383 enum xnn_weights_cache_finalization_kind finalization_kind); 1384 1385 /// Destroy a weights cache object, as well as memory used for the cache. 1386 /// @param weights_cache - the weights cache object to destroy. 1387 enum xnn_status xnn_delete_weights_cache(xnn_weights_cache_t weights_cache); 1388 1389 typedef struct xnn_workspace* xnn_workspace_t; 1390 1391 /// Create a workspace object. 1392 /// @param workspace_out - pointer to the variable that will be initialized to a handle to the workspace object upon 1393 /// successful return. Once created, the workspace can be shared between different Runtime 1394 /// objects. 1395 enum xnn_status xnn_create_workspace(xnn_workspace_t* workspace_out); 1396 /// Destroy a workspace object, as well as memory used by the workspace. Object destruction can be deferred until all 1397 /// Runtime objects created with this workspace are destroyed. 1398 /// @param workspace - the workspace object to destroy. 1399 enum xnn_status xnn_release_workspace(xnn_workspace_t workspace); 1400 1401 /// Runtime is a combination of an execution plan for subgraph Nodes and a memory manager for subgraph Values. 1402 typedef struct xnn_runtime* xnn_runtime_t; 1403 1404 enum xnn_profile_info { 1405 /// Returns a size_t containing the number of operators. 1406 xnn_profile_info_num_operators, 1407 /// Returns a char[] containing the null character separated names of all operators. 1408 xnn_profile_info_operator_name, 1409 /// Returns a uint64_t[] with the runtimes of all operators in the same order as xnn_profile_info_operator_name. 1410 xnn_profile_info_operator_timing, 1411 }; 1412 1413 /// Return profile information for all operators. 1414 /// 1415 /// @param runtime - a Runtime object created with @ref xnn_create_runtime, @ref xnn_create_runtime_v2 or 1416 /// @ref xnn_create_runtime_v3. 1417 /// @param param_name - type of profile information required. 1418 /// @param param_value_size - the size in bytes of memory pointed to by param_value. If this is not sufficient then 1419 /// param_value_size_ret will be set to the required size and xnn_status_out_of_memory will be 1420 /// returned. 1421 /// @param param_value - a pointer to memory location where appropriate values for a given param_value will be written. 1422 /// @param param_value_size_ret - returns number of bytes required to write the result if param_value_size is not 1423 /// sufficient. 1424 enum xnn_status xnn_get_runtime_profiling_info(xnn_runtime_t runtime, 1425 enum xnn_profile_info param_name, 1426 size_t param_value_size, 1427 void* param_value, 1428 size_t* param_value_size_ret); 1429 1430 /// Create a Runtime object from a subgraph. 1431 /// 1432 /// @param subgraph - a Subgraph object with all Values and Nodes that would be handled by the runtime. No Values or 1433 /// Nodes can be added to the runtime once it is constructed. 1434 /// @param weights_cache - a cache for packed weights. The runtime will look up and reuse packed weights in this cache, 1435 /// this will reduce memory allocated for packed weights. 1436 /// @param workspace - a workspace to hold internal tensors. The runtime will allocate space used for internal tensors 1437 /// and track them using workspace. Workspace can be shared and reused across different runtimes. If 1438 /// workspace is NULL, there will be no sharing: each runtime has its own workspace. 1439 /// @param threadpool - the thread pool to be used for parallelisation of computations in the runtime. If the thread 1440 /// pool is NULL, the computation would run on the caller thread without parallelization. 1441 /// @param flags - binary features of the runtime. The only currently supported values are 1442 /// XNN_FLAG_HINT_SPARSE_INFERENCE, XNN_FLAG_HINT_FP16_INFERENCE, XNN_FLAG_FORCE_FP16_INFERENCE, and 1443 /// XNN_FLAG_YIELD_WORKERS. If XNN_FLAG_YIELD_WORKERS is specified, worker threads would be yielded to 1444 /// the system scheduler after processing the last operator in the Runtime. 1445 /// @param runtime_out - pointer to the variable that will be initialized with a handle to the Runtime object upon 1446 /// successful return. Once constructed, the Runtime object is independent of the Subgraph object 1447 /// used to create it. 1448 enum xnn_status xnn_create_runtime_v4( 1449 xnn_subgraph_t subgraph, 1450 xnn_weights_cache_t weights_cache, 1451 xnn_workspace_t workspace, 1452 pthreadpool_t threadpool, 1453 uint32_t flags, 1454 xnn_runtime_t* runtime_out); 1455 1456 enum xnn_status xnn_create_runtime_v3( 1457 xnn_subgraph_t subgraph, 1458 xnn_weights_cache_t weights_cache, 1459 pthreadpool_t threadpool, 1460 uint32_t flags, 1461 xnn_runtime_t* runtime_out); 1462 1463 enum xnn_status xnn_create_runtime_v2( 1464 xnn_subgraph_t subgraph, 1465 pthreadpool_t threadpool, 1466 uint32_t flags, 1467 xnn_runtime_t* runtime_out); 1468 1469 enum xnn_status xnn_create_runtime( 1470 xnn_subgraph_t subgraph, 1471 xnn_runtime_t* runtime_out); 1472 1473 struct xnn_external_value { 1474 uint32_t id; 1475 void* data; 1476 }; 1477 1478 /// Setup data pointers for external inputs and outputs in a Runtime object. 1479 /// 1480 /// @param runtime - a Runtime object created with @ref xnn_create_runtime or @ref xnn_create_runtime_v2. 1481 /// @param num_external_values - the number of external inputs and outputs specified in this call. This number must 1482 /// match the number of external inputs and outputs in the runtime, i.e. all external 1483 /// inputs and outputs in the runtime must be specified in one call. 1484 /// @param external_values - array with location information for all external inputs and outputs in the runtime. 1485 enum xnn_status xnn_setup_runtime( 1486 xnn_runtime_t runtime, 1487 size_t num_external_values, 1488 const struct xnn_external_value* external_values); 1489 1490 /// Execute forward pass for all operators in the runtime. 1491 /// 1492 /// @param runtime - the Runtime object with the execution plan to invoke. 1493 enum xnn_status xnn_invoke_runtime( 1494 xnn_runtime_t runtime); 1495 1496 /// Destroy a Runtime object, as well as operators and memory associated with it. 1497 /// 1498 /// @param runtime - the Runtime object to destroy. 1499 enum xnn_status xnn_delete_runtime( 1500 xnn_runtime_t runtime); 1501 1502 typedef struct xnn_operator* xnn_operator_t; 1503 1504 enum xnn_status xnn_run_operator( 1505 xnn_operator_t op, 1506 pthreadpool_t threadpool); 1507 1508 enum xnn_status xnn_delete_operator( 1509 xnn_operator_t op); 1510 1511 #ifndef XNN_NO_F32_OPERATORS 1512 1513 enum xnn_status xnn_create_abs_nc_f32( 1514 size_t channels, 1515 size_t input_stride, 1516 size_t output_stride, 1517 uint32_t flags, 1518 xnn_operator_t* abs_op_out); 1519 1520 enum xnn_status xnn_setup_abs_nc_f32( 1521 xnn_operator_t abs_op, 1522 size_t batch_size, 1523 const float* input, 1524 float* output, 1525 pthreadpool_t threadpool); 1526 1527 enum xnn_status xnn_create_add_nd_f32( 1528 float output_min, 1529 float output_max, 1530 uint32_t flags, 1531 xnn_operator_t* add_op_out); 1532 1533 enum xnn_status xnn_setup_add_nd_f32( 1534 xnn_operator_t add_op, 1535 size_t num_input1_dims, 1536 const size_t* input1_shape, 1537 size_t num_input2_dims, 1538 const size_t* input2_shape, 1539 const float* input1, 1540 const float* input2, 1541 float* output, 1542 pthreadpool_t threadpool); 1543 1544 enum xnn_status xnn_create_argmax_pooling2d_nhwc_f32( 1545 uint32_t input_padding_top, 1546 uint32_t input_padding_right, 1547 uint32_t input_padding_bottom, 1548 uint32_t input_padding_left, 1549 uint32_t pooling_height, 1550 uint32_t pooling_width, 1551 size_t channels, 1552 size_t input_pixel_stride, 1553 size_t output_pixel_stride, 1554 uint32_t flags, 1555 xnn_operator_t* argmax_pooling_op_out); 1556 1557 enum xnn_status xnn_setup_argmax_pooling2d_nhwc_f32( 1558 xnn_operator_t argmax_pooling_op, 1559 size_t batch_size, 1560 size_t input_height, 1561 size_t input_width, 1562 const float* input, 1563 float* output, 1564 uint32_t* index, 1565 pthreadpool_t threadpool); 1566 1567 enum xnn_status xnn_create_average_pooling2d_nhwc_f32( 1568 uint32_t input_padding_top, 1569 uint32_t input_padding_right, 1570 uint32_t input_padding_bottom, 1571 uint32_t input_padding_left, 1572 uint32_t pooling_height, 1573 uint32_t pooling_width, 1574 uint32_t stride_height, 1575 uint32_t stride_width, 1576 size_t channels, 1577 size_t input_pixel_stride, 1578 size_t output_pixel_stride, 1579 float output_min, 1580 float output_max, 1581 uint32_t flags, 1582 xnn_operator_t* average_pooling_op_out); 1583 1584 enum xnn_status xnn_setup_average_pooling2d_nhwc_f32( 1585 xnn_operator_t average_pooling_op, 1586 size_t batch_size, 1587 size_t input_height, 1588 size_t input_width, 1589 const float* input, 1590 float* output, 1591 pthreadpool_t threadpool); 1592 1593 enum xnn_status xnn_create_bankers_rounding_nc_f32( 1594 size_t channels, 1595 size_t input_stride, 1596 size_t output_stride, 1597 uint32_t flags, 1598 xnn_operator_t* rounding_op_out); 1599 1600 enum xnn_status xnn_setup_bankers_rounding_nc_f32( 1601 xnn_operator_t rounding_op, 1602 size_t batch_size, 1603 const float* input, 1604 float* output, 1605 pthreadpool_t threadpool); 1606 1607 enum xnn_status xnn_create_ceiling_nc_f32( 1608 size_t channels, 1609 size_t input_stride, 1610 size_t output_stride, 1611 uint32_t flags, 1612 xnn_operator_t* ceiling_op_out); 1613 1614 enum xnn_status xnn_setup_ceiling_nc_f32( 1615 xnn_operator_t ceiling_op, 1616 size_t batch_size, 1617 const float* input, 1618 float* output, 1619 pthreadpool_t threadpool); 1620 1621 enum xnn_status xnn_create_clamp_nc_f32( 1622 size_t channels, 1623 size_t input_stride, 1624 size_t output_stride, 1625 float output_min, 1626 float output_max, 1627 uint32_t flags, 1628 xnn_operator_t* clamp_op_out); 1629 1630 enum xnn_status xnn_setup_clamp_nc_f32( 1631 xnn_operator_t clamp_op, 1632 size_t batch_size, 1633 const float* input, 1634 float* output, 1635 pthreadpool_t threadpool); 1636 1637 typedef const struct xnn_caches* xnn_caches_t; 1638 1639 enum xnn_status xnn_create_convolution2d_nhwc_f32( 1640 uint32_t input_padding_top, 1641 uint32_t input_padding_right, 1642 uint32_t input_padding_bottom, 1643 uint32_t input_padding_left, 1644 uint32_t kernel_height, 1645 uint32_t kernel_width, 1646 uint32_t subsampling_height, 1647 uint32_t subsampling_width, 1648 uint32_t dilation_height, 1649 uint32_t dilation_width, 1650 uint32_t groups, 1651 size_t group_input_channels, 1652 size_t group_output_channels, 1653 size_t input_channel_stride, 1654 size_t output_channel_stride, 1655 const float* kernel, 1656 const float* bias, 1657 float output_min, 1658 float output_max, 1659 uint32_t flags, 1660 xnn_caches_t caches, 1661 xnn_operator_t* convolution_op_out); 1662 1663 // Forward declare. 1664 struct xnn_post_operation; 1665 1666 /// Create a convolution operator with a number of post operations. The 1667 /// convolution operator created using this function does not have output_min 1668 /// and output_max. The list of operators in post_operations will be applied in 1669 /// order. Convolution with post operations is only supported on JIT platforms 1670 /// and when JIT is enabled. 1671 enum xnn_status xnn_create_fused_convolution2d_nhwc_f32( 1672 uint32_t input_padding_top, 1673 uint32_t input_padding_right, 1674 uint32_t input_padding_bottom, 1675 uint32_t input_padding_left, 1676 uint32_t kernel_height, 1677 uint32_t kernel_width, 1678 uint32_t subsampling_height, 1679 uint32_t subsampling_width, 1680 uint32_t dilation_height, 1681 uint32_t dilation_width, 1682 uint32_t groups, 1683 size_t group_input_channels, 1684 size_t group_output_channels, 1685 size_t input_channel_stride, 1686 size_t output_channel_stride, 1687 const float* kernel, 1688 const float* bias, 1689 size_t num_post_operations, 1690 struct xnn_post_operation* post_operations, 1691 uint32_t flags, 1692 xnn_caches_t caches, 1693 xnn_operator_t* convolution_op_out); 1694 1695 enum xnn_status xnn_setup_convolution2d_nhwc_f32( 1696 xnn_operator_t convolution_op, 1697 size_t batch_size, 1698 size_t input_height, 1699 size_t input_width, 1700 const float* input, 1701 float* output, 1702 pthreadpool_t threadpool); 1703 1704 enum xnn_status xnn_create_deconvolution2d_nhwc_f32( 1705 uint32_t output_padding_top, 1706 uint32_t output_padding_right, 1707 uint32_t output_padding_bottom, 1708 uint32_t output_padding_left, 1709 uint32_t kernel_height, 1710 uint32_t kernel_width, 1711 uint32_t stride_height, 1712 uint32_t stride_width, 1713 uint32_t dilation_height, 1714 uint32_t dilation_width, 1715 uint32_t groups, 1716 size_t group_input_channels, 1717 size_t group_output_channels, 1718 size_t input_pixel_stride, 1719 size_t output_pixel_stride, 1720 const float* kernel, 1721 const float* bias, 1722 float output_min, 1723 float output_max, 1724 uint32_t flags, 1725 xnn_caches_t caches, 1726 xnn_operator_t* deconvolution_op_out); 1727 1728 enum xnn_status xnn_setup_deconvolution2d_nhwc_f32( 1729 xnn_operator_t deconvolution_op, 1730 size_t batch_size, 1731 size_t input_height, 1732 size_t input_width, 1733 uint32_t adjustment_height, 1734 uint32_t adjustment_width, 1735 const float* input, 1736 float* output, 1737 pthreadpool_t threadpool); 1738 1739 enum xnn_status xnn_create_divide_nd_f32( 1740 float output_min, 1741 float output_max, 1742 uint32_t flags, 1743 xnn_operator_t* divide_op_out); 1744 1745 enum xnn_status xnn_setup_divide_nd_f32( 1746 xnn_operator_t divide_op, 1747 size_t num_input1_dims, 1748 const size_t* input1_shape, 1749 size_t num_input2_dims, 1750 const size_t* input2_shape, 1751 const float* input1, 1752 const float* input2, 1753 float* output, 1754 pthreadpool_t threadpool); 1755 1756 enum xnn_status xnn_create_elu_nc_f32( 1757 size_t channels, 1758 size_t input_stride, 1759 size_t output_stride, 1760 float alpha, 1761 uint32_t flags, 1762 xnn_operator_t* elu_op_out); 1763 1764 enum xnn_status xnn_setup_elu_nc_f32( 1765 xnn_operator_t elu_op, 1766 size_t batch_size, 1767 const float* input, 1768 float* output, 1769 pthreadpool_t threadpool); 1770 1771 enum xnn_status xnn_create_floor_nc_f32( 1772 size_t channels, 1773 size_t input_stride, 1774 size_t output_stride, 1775 uint32_t flags, 1776 xnn_operator_t* floor_op_out); 1777 1778 enum xnn_status xnn_setup_floor_nc_f32( 1779 xnn_operator_t floor_op, 1780 size_t batch_size, 1781 const float* input, 1782 float* output, 1783 pthreadpool_t threadpool); 1784 1785 enum xnn_status xnn_create_fully_connected_nc_f32( 1786 size_t input_channels, 1787 size_t output_channels, 1788 size_t input_stride, 1789 size_t output_stride, 1790 const float* kernel, 1791 const float* bias, 1792 float output_min, 1793 float output_max, 1794 uint32_t flags, 1795 const xnn_caches_t caches, 1796 xnn_operator_t* fully_connected_op_out); 1797 1798 enum xnn_status xnn_setup_fully_connected_nc_f32( 1799 xnn_operator_t fully_connected_op, 1800 size_t batch_size, 1801 const float* input, 1802 float* output, 1803 pthreadpool_t threadpool); 1804 1805 enum xnn_status xnn_create_global_average_pooling_nwc_f32( 1806 size_t channels, 1807 size_t input_stride, 1808 size_t output_stride, 1809 float output_min, 1810 float output_max, 1811 uint32_t flags, 1812 xnn_operator_t* global_average_pooling_op_out); 1813 1814 enum xnn_status xnn_setup_global_average_pooling_nwc_f32( 1815 xnn_operator_t global_average_pooling_op, 1816 size_t batch_size, 1817 size_t width, 1818 const float* input, 1819 float* output, 1820 pthreadpool_t threadpool); 1821 1822 enum xnn_status xnn_create_hardswish_nc_f32( 1823 size_t channels, 1824 size_t input_stride, 1825 size_t output_stride, 1826 uint32_t flags, 1827 xnn_operator_t* hardswish_op_out); 1828 1829 enum xnn_status xnn_setup_hardswish_nc_f32( 1830 xnn_operator_t hardswish_op, 1831 size_t batch_size, 1832 const float* input, 1833 float* output, 1834 pthreadpool_t threadpool); 1835 1836 enum xnn_status xnn_create_leaky_relu_nc_f32( 1837 size_t channels, 1838 size_t input_stride, 1839 size_t output_stride, 1840 float negative_slope, 1841 uint32_t flags, 1842 xnn_operator_t* leaky_relu_op_out); 1843 1844 enum xnn_status xnn_setup_leaky_relu_nc_f32( 1845 xnn_operator_t leaky_relu_op, 1846 size_t batch_size, 1847 const float* input, 1848 float* output, 1849 pthreadpool_t threadpool); 1850 1851 enum xnn_status xnn_create_max_pooling2d_nhwc_f32( 1852 uint32_t input_padding_top, 1853 uint32_t input_padding_right, 1854 uint32_t input_padding_bottom, 1855 uint32_t input_padding_left, 1856 uint32_t pooling_height, 1857 uint32_t pooling_width, 1858 uint32_t stride_height, 1859 uint32_t stride_width, 1860 uint32_t dilation_height, 1861 uint32_t dilation_width, 1862 size_t channels, 1863 size_t input_pixel_stride, 1864 size_t output_pixel_stride, 1865 float output_min, 1866 float output_max, 1867 uint32_t flags, 1868 xnn_operator_t* max_pooling_op_out); 1869 1870 enum xnn_status xnn_setup_max_pooling2d_nhwc_f32( 1871 xnn_operator_t max_pooling_op, 1872 size_t batch_size, 1873 size_t input_height, 1874 size_t input_width, 1875 const float* input, 1876 float* output, 1877 pthreadpool_t threadpool); 1878 1879 enum xnn_status xnn_create_maximum_nd_f32( 1880 uint32_t flags, 1881 xnn_operator_t* maximum_op_out); 1882 1883 enum xnn_status xnn_setup_maximum_nd_f32( 1884 xnn_operator_t maximum_op, 1885 size_t num_input1_dims, 1886 const size_t* input1_shape, 1887 size_t num_input2_dims, 1888 const size_t* input2_shape, 1889 const float* input1, 1890 const float* input2, 1891 float* output, 1892 pthreadpool_t threadpool); 1893 1894 enum xnn_status xnn_create_minimum_nd_f32( 1895 uint32_t flags, 1896 xnn_operator_t* minimum_op_out); 1897 1898 enum xnn_status xnn_setup_minimum_nd_f32( 1899 xnn_operator_t minimum_op, 1900 size_t num_input1_dims, 1901 const size_t* input1_shape, 1902 size_t num_input2_dims, 1903 const size_t* input2_shape, 1904 const float* input1, 1905 const float* input2, 1906 float* output, 1907 pthreadpool_t threadpool); 1908 1909 enum xnn_status xnn_create_multiply_nd_f32( 1910 float output_min, 1911 float output_max, 1912 uint32_t flags, 1913 xnn_operator_t* multiply_op_out); 1914 1915 enum xnn_status xnn_setup_multiply_nd_f32( 1916 xnn_operator_t multiply_op, 1917 size_t num_input1_dims, 1918 const size_t* input1_shape, 1919 size_t num_input2_dims, 1920 const size_t* input2_shape, 1921 const float* input1, 1922 const float* input2, 1923 float* output, 1924 pthreadpool_t threadpool); 1925 1926 enum xnn_status xnn_create_negate_nc_f32( 1927 size_t channels, 1928 size_t input_stride, 1929 size_t output_stride, 1930 uint32_t flags, 1931 xnn_operator_t* negate_op_out); 1932 1933 enum xnn_status xnn_setup_negate_nc_f32( 1934 xnn_operator_t negate_op, 1935 size_t batch_size, 1936 const float* input, 1937 float* output, 1938 pthreadpool_t threadpool); 1939 1940 enum xnn_status xnn_create_prelu_nc_f32( 1941 size_t channels, 1942 size_t input_stride, 1943 size_t output_stride, 1944 const float* negative_slope, 1945 uint32_t flags, 1946 xnn_caches_t caches, 1947 xnn_operator_t* prelu_op_out); 1948 1949 enum xnn_status xnn_setup_prelu_nc_f32( 1950 xnn_operator_t prelu_op, 1951 size_t batch_size, 1952 const float* input, 1953 float* output, 1954 pthreadpool_t threadpool); 1955 1956 enum xnn_status xnn_create_resize_bilinear2d_nchw_f32( 1957 size_t channels, 1958 size_t input_pixel_stride, 1959 size_t output_pixel_stride, 1960 uint32_t flags, 1961 xnn_operator_t* resize_op_out); 1962 1963 enum xnn_status xnn_setup_resize_bilinear2d_nchw_f32( 1964 xnn_operator_t resize_op, 1965 size_t batch_size, 1966 size_t input_height, 1967 size_t input_width, 1968 size_t output_height, 1969 size_t output_width, 1970 const float* input, 1971 float* output, 1972 pthreadpool_t threadpool); 1973 1974 enum xnn_status xnn_create_resize_bilinear2d_nhwc_f32( 1975 size_t channels, 1976 size_t input_pixel_stride, 1977 size_t output_pixel_stride, 1978 uint32_t flags, 1979 xnn_operator_t* resize_op_out); 1980 1981 enum xnn_status xnn_setup_resize_bilinear2d_nhwc_f32( 1982 xnn_operator_t resize_op, 1983 size_t batch_size, 1984 size_t input_height, 1985 size_t input_width, 1986 size_t output_height, 1987 size_t output_width, 1988 const float* input, 1989 float* output, 1990 pthreadpool_t threadpool); 1991 1992 enum xnn_status xnn_create_sigmoid_nc_f32( 1993 size_t channels, 1994 size_t input_stride, 1995 size_t output_stride, 1996 uint32_t flags, 1997 xnn_operator_t* sigmoid_op_out); 1998 1999 enum xnn_status xnn_setup_sigmoid_nc_f32( 2000 xnn_operator_t sigmoid_op, 2001 size_t batch_size, 2002 const float* input, 2003 float* output, 2004 pthreadpool_t threadpool); 2005 2006 enum xnn_status xnn_create_softmax_nc_f32( 2007 size_t channels, 2008 size_t input_stride, 2009 size_t output_stride, 2010 uint32_t flags, 2011 xnn_operator_t* softmax_op_out); 2012 2013 enum xnn_status xnn_setup_softmax_nc_f32( 2014 xnn_operator_t softmax_op, 2015 size_t batch_size, 2016 const float* input, 2017 float* output, 2018 pthreadpool_t threadpool); 2019 2020 enum xnn_status xnn_create_square_nc_f32( 2021 size_t channels, 2022 size_t input_stride, 2023 size_t output_stride, 2024 uint32_t flags, 2025 xnn_operator_t* square_op_out); 2026 2027 enum xnn_status xnn_setup_square_nc_f32( 2028 xnn_operator_t square_op, 2029 size_t batch_size, 2030 const float* input, 2031 float* output, 2032 pthreadpool_t threadpool); 2033 2034 enum xnn_status xnn_create_square_root_nc_f32( 2035 size_t channels, 2036 size_t input_stride, 2037 size_t output_stride, 2038 uint32_t flags, 2039 xnn_operator_t* sqrt_op_out); 2040 2041 enum xnn_status xnn_setup_square_root_nc_f32( 2042 xnn_operator_t sqrt_op, 2043 size_t batch_size, 2044 const float* input, 2045 float* output, 2046 pthreadpool_t threadpool); 2047 2048 enum xnn_status xnn_create_squared_difference_nd_f32( 2049 uint32_t flags, 2050 xnn_operator_t* squared_difference_op_out); 2051 2052 enum xnn_status xnn_setup_squared_difference_nd_f32( 2053 xnn_operator_t squared_difference_op, 2054 size_t num_input1_dims, 2055 const size_t* input1_shape, 2056 size_t num_input2_dims, 2057 const size_t* input2_shape, 2058 const float* input1, 2059 const float* input2, 2060 float* output, 2061 pthreadpool_t threadpool); 2062 2063 enum xnn_status xnn_create_subtract_nd_f32( 2064 float output_min, 2065 float output_max, 2066 uint32_t flags, 2067 xnn_operator_t* subtract_op_out); 2068 2069 enum xnn_status xnn_setup_subtract_nd_f32( 2070 xnn_operator_t subtract_op, 2071 size_t num_input1_dims, 2072 const size_t* input1_shape, 2073 size_t num_input2_dims, 2074 const size_t* input2_shape, 2075 const float* input1, 2076 const float* input2, 2077 float* output, 2078 pthreadpool_t threadpool); 2079 2080 enum xnn_status xnn_create_truncation_nc_f32( 2081 size_t channels, 2082 size_t input_stride, 2083 size_t output_stride, 2084 uint32_t flags, 2085 xnn_operator_t* truncation_op_out); 2086 2087 enum xnn_status xnn_setup_truncation_nc_f32( 2088 xnn_operator_t truncation_op, 2089 size_t batch_size, 2090 const float* input, 2091 float* output, 2092 pthreadpool_t threadpool); 2093 2094 #ifndef XNN_NO_NCHW_OPERATORS 2095 2096 enum xnn_status xnn_create_convolution2d_nchw_f32( 2097 uint32_t input_padding_top, 2098 uint32_t input_padding_right, 2099 uint32_t input_padding_bottom, 2100 uint32_t input_padding_left, 2101 uint32_t kernel_height, 2102 uint32_t kernel_width, 2103 uint32_t subsampling_height, 2104 uint32_t subsampling_width, 2105 uint32_t dilation_height, 2106 uint32_t dilation_width, 2107 uint32_t groups, 2108 size_t group_input_channels, 2109 size_t group_output_channels, 2110 size_t input_channel_stride, 2111 size_t output_channel_stride, 2112 const float* kernel, 2113 const float* bias, 2114 float output_min, 2115 float output_max, 2116 uint32_t flags, 2117 xnn_caches_t caches, 2118 xnn_operator_t* convolution_op_out); 2119 2120 enum xnn_status xnn_setup_convolution2d_nchw_f32( 2121 xnn_operator_t convolution_op, 2122 size_t batch_size, 2123 size_t input_height, 2124 size_t input_width, 2125 const float* input, 2126 float* output, 2127 pthreadpool_t threadpool); 2128 2129 enum xnn_status xnn_create_global_average_pooling_ncw_f32( 2130 size_t channels, 2131 float output_min, 2132 float output_max, 2133 uint32_t flags, 2134 xnn_operator_t* global_average_pooling_op_out); 2135 2136 enum xnn_status xnn_setup_global_average_pooling_ncw_f32( 2137 xnn_operator_t global_average_pooling_op, 2138 size_t batch_size, 2139 size_t width, 2140 const float* input, 2141 float* output, 2142 pthreadpool_t threadpool); 2143 2144 #endif // XNN_NO_NCHW_OPERATORS 2145 2146 #endif // XNN_NO_F32_OPERATORS 2147 2148 #ifndef XNN_NO_X32_OPERATORS 2149 2150 enum xnn_status xnn_create_channel_shuffle_nc_x32( 2151 size_t groups, 2152 size_t group_channels, 2153 size_t input_stride, 2154 size_t output_stride, 2155 uint32_t flags, 2156 xnn_operator_t* channel_shuffle_op_out); 2157 2158 enum xnn_status xnn_setup_channel_shuffle_nc_x32( 2159 xnn_operator_t channel_shuffle_op, 2160 size_t batch_size, 2161 const void* input, 2162 void* output, 2163 pthreadpool_t threadpool); 2164 2165 enum xnn_status xnn_create_constant_pad_nd_x32( 2166 const void* padding_value, 2167 uint32_t flags, 2168 xnn_operator_t* constant_pad_op_out); 2169 2170 enum xnn_status xnn_setup_constant_pad_nd_x32( 2171 xnn_operator_t constant_pad_op, 2172 size_t num_dims, 2173 const size_t* input_shape, 2174 const size_t* pre_padding, 2175 const size_t* post_padding, 2176 const void* input, 2177 void* output, 2178 pthreadpool_t threadpool); 2179 2180 enum xnn_status xnn_create_copy_nc_x32( 2181 size_t channels, 2182 size_t input_stride, 2183 size_t output_stride, 2184 uint32_t flags, 2185 xnn_operator_t* copy_op_out); 2186 2187 enum xnn_status xnn_setup_copy_nc_x32( 2188 xnn_operator_t copy_op, 2189 size_t batch_size, 2190 const void* input, 2191 void* output, 2192 pthreadpool_t threadpool); 2193 2194 enum xnn_status xnn_create_depth_to_space_nhwc_x32( 2195 size_t output_channels, 2196 size_t input_channel_stride, 2197 size_t output_channel_stride, 2198 uint32_t block_size, 2199 uint32_t flags, 2200 xnn_operator_t* depth_to_space_op_out); 2201 2202 enum xnn_status xnn_setup_depth_to_space_nhwc_x32( 2203 xnn_operator_t depth_to_space_op, 2204 size_t batch_size, 2205 size_t input_height, 2206 size_t input_width, 2207 const void* input, 2208 void* output, 2209 pthreadpool_t threadpool); 2210 2211 enum xnn_status xnn_create_depth_to_space_nchw2nhwc_x32( 2212 size_t output_channels, 2213 size_t input_channel_stride, 2214 size_t output_channel_stride, 2215 uint32_t block_size, 2216 uint32_t flags, 2217 xnn_operator_t* depth_to_space_op_out); 2218 2219 enum xnn_status xnn_setup_depth_to_space_nchw2nhwc_x32( 2220 xnn_operator_t depth_to_space_op, 2221 size_t batch_size, 2222 size_t input_height, 2223 size_t input_width, 2224 const void* input, 2225 void* output, 2226 pthreadpool_t threadpool); 2227 2228 enum xnn_status xnn_create_space_to_depth_nhwc_x32( 2229 size_t input_channels, 2230 size_t input_channel_stride, 2231 size_t output_channel_stride, 2232 uint32_t block_size, 2233 uint32_t flags, 2234 xnn_operator_t* space_to_depth_op_out); 2235 2236 enum xnn_status xnn_setup_space_to_depth_nhwc_x32( 2237 xnn_operator_t space_to_depth_op, 2238 size_t batch_size, 2239 size_t input_height, 2240 size_t input_width, 2241 const void* input, 2242 void* output, 2243 pthreadpool_t threadpool); 2244 2245 enum xnn_status xnn_create_transpose_nd_x32( 2246 uint32_t flags, 2247 xnn_operator_t* transpose_op_out); 2248 2249 enum xnn_status xnn_setup_transpose_nd_x32( 2250 xnn_operator_t transpose_op, 2251 const void* input, 2252 void* output, 2253 const size_t num_dims, 2254 const size_t* input_shape, 2255 const size_t* output_perm, 2256 pthreadpool_t threadpool); 2257 2258 enum xnn_status xnn_run_transpose_nd_x32( 2259 uint32_t flags, 2260 const void* input, 2261 void* output, 2262 const size_t num_dims, 2263 const size_t* input_shape, 2264 const size_t* output_perm, 2265 pthreadpool_t threadpool); 2266 2267 enum xnn_status xnn_create_unpooling2d_nhwc_x32( 2268 uint32_t input_padding_top, 2269 uint32_t input_padding_right, 2270 uint32_t input_padding_bottom, 2271 uint32_t input_padding_left, 2272 uint32_t pooling_height, 2273 uint32_t pooling_width, 2274 size_t channels, 2275 size_t input_pixel_stride, 2276 size_t output_pixel_stride, 2277 uint32_t flags, 2278 xnn_operator_t* unpooling_op_out); 2279 2280 enum xnn_status xnn_setup_unpooling2d_nhwc_x32( 2281 xnn_operator_t unpooling_op, 2282 size_t batch_size, 2283 size_t input_height, 2284 size_t input_width, 2285 const void* input, 2286 const uint32_t* index, 2287 void* output, 2288 pthreadpool_t threadpool); 2289 2290 #endif // XNN_NO_X32_OPERATORS 2291 2292 #ifndef XNN_NO_F16_OPERATORS 2293 2294 enum xnn_status xnn_create_abs_nc_f16( 2295 size_t channels, 2296 size_t input_stride, 2297 size_t output_stride, 2298 uint32_t flags, 2299 xnn_operator_t* abs_op_out); 2300 2301 enum xnn_status xnn_setup_abs_nc_f16( 2302 xnn_operator_t abs_op, 2303 size_t batch_size, 2304 const void* input, 2305 void* output, 2306 pthreadpool_t threadpool); 2307 2308 enum xnn_status xnn_create_add_nd_f16( 2309 float output_min, 2310 float output_max, 2311 uint32_t flags, 2312 xnn_operator_t* add_op_out); 2313 2314 enum xnn_status xnn_setup_add_nd_f16( 2315 xnn_operator_t add_op, 2316 size_t num_input1_dims, 2317 const size_t* input1_shape, 2318 size_t num_input2_dims, 2319 const size_t* input2_shape, 2320 const void* input1, 2321 const void* input2, 2322 void* output, 2323 pthreadpool_t threadpool); 2324 2325 enum xnn_status xnn_create_average_pooling2d_nhwc_f16( 2326 uint32_t input_padding_top, 2327 uint32_t input_padding_right, 2328 uint32_t input_padding_bottom, 2329 uint32_t input_padding_left, 2330 uint32_t pooling_height, 2331 uint32_t pooling_width, 2332 uint32_t stride_height, 2333 uint32_t stride_width, 2334 size_t channels, 2335 size_t input_pixel_stride, 2336 size_t output_pixel_stride, 2337 float output_min, 2338 float output_max, 2339 uint32_t flags, 2340 xnn_operator_t* average_pooling_op_out); 2341 2342 enum xnn_status xnn_setup_average_pooling2d_nhwc_f16( 2343 xnn_operator_t average_pooling_op, 2344 size_t batch_size, 2345 size_t input_height, 2346 size_t input_width, 2347 const void* input, 2348 void* output, 2349 pthreadpool_t threadpool); 2350 2351 enum xnn_status xnn_create_bankers_rounding_nc_f16( 2352 size_t channels, 2353 size_t input_stride, 2354 size_t output_stride, 2355 uint32_t flags, 2356 xnn_operator_t* rounding_op_out); 2357 2358 enum xnn_status xnn_setup_bankers_rounding_nc_f16( 2359 xnn_operator_t rounding_op, 2360 size_t batch_size, 2361 const void* input, 2362 void* output, 2363 pthreadpool_t threadpool); 2364 2365 enum xnn_status xnn_create_ceiling_nc_f16( 2366 size_t channels, 2367 size_t input_stride, 2368 size_t output_stride, 2369 uint32_t flags, 2370 xnn_operator_t* ceiling_op_out); 2371 2372 enum xnn_status xnn_setup_ceiling_nc_f16( 2373 xnn_operator_t ceiling_op, 2374 size_t batch_size, 2375 const void* input, 2376 void* output, 2377 pthreadpool_t threadpool); 2378 2379 enum xnn_status xnn_create_clamp_nc_f16( 2380 size_t channels, 2381 size_t input_stride, 2382 size_t output_stride, 2383 float output_min, 2384 float output_max, 2385 uint32_t flags, 2386 xnn_operator_t* clamp_op_out); 2387 2388 enum xnn_status xnn_setup_clamp_nc_f16( 2389 xnn_operator_t clamp_op, 2390 size_t batch_size, 2391 const void* input, 2392 void* output, 2393 pthreadpool_t threadpool); 2394 2395 enum xnn_status xnn_create_convolution2d_nhwc_f16( 2396 uint32_t input_padding_top, 2397 uint32_t input_padding_right, 2398 uint32_t input_padding_bottom, 2399 uint32_t input_padding_left, 2400 uint32_t kernel_height, 2401 uint32_t kernel_width, 2402 uint32_t subsampling_height, 2403 uint32_t subsampling_width, 2404 uint32_t dilation_height, 2405 uint32_t dilation_width, 2406 uint32_t groups, 2407 size_t group_input_channels, 2408 size_t group_output_channels, 2409 size_t input_channel_stride, 2410 size_t output_channel_stride, 2411 const void* kernel, 2412 const void* bias, 2413 float output_min, 2414 float output_max, 2415 uint32_t flags, 2416 xnn_caches_t caches, 2417 xnn_operator_t* convolution_op_out); 2418 2419 enum xnn_status xnn_setup_convolution2d_nhwc_f16( 2420 xnn_operator_t convolution_op, 2421 size_t batch_size, 2422 size_t input_height, 2423 size_t input_width, 2424 const void* input, 2425 void* output, 2426 pthreadpool_t threadpool); 2427 2428 enum xnn_status xnn_create_deconvolution2d_nhwc_f16( 2429 uint32_t output_padding_top, 2430 uint32_t output_padding_right, 2431 uint32_t output_padding_bottom, 2432 uint32_t output_padding_left, 2433 uint32_t kernel_height, 2434 uint32_t kernel_width, 2435 uint32_t stride_height, 2436 uint32_t stride_width, 2437 uint32_t dilation_height, 2438 uint32_t dilation_width, 2439 uint32_t groups, 2440 size_t group_input_channels, 2441 size_t group_output_channels, 2442 size_t input_pixel_stride, 2443 size_t output_pixel_stride, 2444 const void* kernel, 2445 const void* bias, 2446 float output_min, 2447 float output_max, 2448 uint32_t flags, 2449 xnn_caches_t caches, 2450 xnn_operator_t* deconvolution_op_out); 2451 2452 enum xnn_status xnn_setup_deconvolution2d_nhwc_f16( 2453 xnn_operator_t deconvolution_op, 2454 size_t batch_size, 2455 size_t input_height, 2456 size_t input_width, 2457 uint32_t adjustment_height, 2458 uint32_t adjustment_width, 2459 const void* input, 2460 void* output, 2461 pthreadpool_t threadpool); 2462 2463 enum xnn_status xnn_create_divide_nd_f16( 2464 float output_min, 2465 float output_max, 2466 uint32_t flags, 2467 xnn_operator_t* divide_op_out); 2468 2469 enum xnn_status xnn_setup_divide_nd_f16( 2470 xnn_operator_t divide_op, 2471 size_t num_input1_dims, 2472 const size_t* input1_shape, 2473 size_t num_input2_dims, 2474 const size_t* input2_shape, 2475 const void* input1, 2476 const void* input2, 2477 void* output, 2478 pthreadpool_t threadpool); 2479 2480 enum xnn_status xnn_create_elu_nc_f16( 2481 size_t channels, 2482 size_t input_stride, 2483 size_t output_stride, 2484 float alpha, 2485 uint32_t flags, 2486 xnn_operator_t* elu_op_out); 2487 2488 enum xnn_status xnn_setup_elu_nc_f16( 2489 xnn_operator_t elu_op, 2490 size_t batch_size, 2491 const void* input, 2492 void* output, 2493 pthreadpool_t threadpool); 2494 2495 enum xnn_status xnn_create_floor_nc_f16( 2496 size_t channels, 2497 size_t input_stride, 2498 size_t output_stride, 2499 uint32_t flags, 2500 xnn_operator_t* floor_op_out); 2501 2502 enum xnn_status xnn_setup_floor_nc_f16( 2503 xnn_operator_t floor_op, 2504 size_t batch_size, 2505 const void* input, 2506 void* output, 2507 pthreadpool_t threadpool); 2508 2509 enum xnn_status xnn_create_fully_connected_nc_f16( 2510 size_t input_channels, 2511 size_t output_channels, 2512 size_t input_stride, 2513 size_t output_stride, 2514 const void* kernel, 2515 const void* bias, 2516 float output_min, 2517 float output_max, 2518 uint32_t flags, 2519 xnn_caches_t caches, 2520 xnn_operator_t* fully_connected_op_out); 2521 2522 enum xnn_status xnn_setup_fully_connected_nc_f16( 2523 xnn_operator_t fully_connected_op, 2524 size_t batch_size, 2525 const void* input, 2526 void* output, 2527 pthreadpool_t threadpool); 2528 2529 enum xnn_status xnn_create_global_average_pooling_nwc_f16( 2530 size_t channels, 2531 size_t input_stride, 2532 size_t output_stride, 2533 float output_min, 2534 float output_max, 2535 uint32_t flags, 2536 xnn_operator_t* global_average_pooling_op_out); 2537 2538 enum xnn_status xnn_setup_global_average_pooling_nwc_f16( 2539 xnn_operator_t global_average_pooling_op, 2540 size_t batch_size, 2541 size_t width, 2542 const void* input, 2543 void* output, 2544 pthreadpool_t threadpool); 2545 2546 enum xnn_status xnn_create_hardswish_nc_f16( 2547 size_t channels, 2548 size_t input_stride, 2549 size_t output_stride, 2550 uint32_t flags, 2551 xnn_operator_t* hardswish_op_out); 2552 2553 enum xnn_status xnn_setup_hardswish_nc_f16( 2554 xnn_operator_t hardswish_op, 2555 size_t batch_size, 2556 const void* input, 2557 void* output, 2558 pthreadpool_t threadpool); 2559 2560 enum xnn_status xnn_create_leaky_relu_nc_f16( 2561 size_t channels, 2562 size_t input_stride, 2563 size_t output_stride, 2564 float negative_slope, 2565 uint32_t flags, 2566 xnn_operator_t* leaky_relu_op_out); 2567 2568 enum xnn_status xnn_setup_leaky_relu_nc_f16( 2569 xnn_operator_t leaky_relu_op, 2570 size_t batch_size, 2571 const void* input, 2572 void* output, 2573 pthreadpool_t threadpool); 2574 2575 enum xnn_status xnn_create_max_pooling2d_nhwc_f16( 2576 uint32_t input_padding_top, 2577 uint32_t input_padding_right, 2578 uint32_t input_padding_bottom, 2579 uint32_t input_padding_left, 2580 uint32_t pooling_height, 2581 uint32_t pooling_width, 2582 uint32_t stride_height, 2583 uint32_t stride_width, 2584 uint32_t dilation_height, 2585 uint32_t dilation_width, 2586 size_t channels, 2587 size_t input_pixel_stride, 2588 size_t output_pixel_stride, 2589 float output_min, 2590 float output_max, 2591 uint32_t flags, 2592 xnn_operator_t* max_pooling_op_out); 2593 2594 enum xnn_status xnn_setup_max_pooling2d_nhwc_f16( 2595 xnn_operator_t max_pooling_op, 2596 size_t batch_size, 2597 size_t input_height, 2598 size_t input_width, 2599 const void* input, 2600 void* output, 2601 pthreadpool_t threadpool); 2602 2603 enum xnn_status xnn_create_maximum_nd_f16( 2604 uint32_t flags, 2605 xnn_operator_t* maximum_op_out); 2606 2607 enum xnn_status xnn_setup_maximum_nd_f16( 2608 xnn_operator_t maximum_op, 2609 size_t num_input1_dims, 2610 const size_t* input1_shape, 2611 size_t num_input2_dims, 2612 const size_t* input2_shape, 2613 const void* input1, 2614 const void* input2, 2615 void* output, 2616 pthreadpool_t threadpool); 2617 2618 enum xnn_status xnn_create_minimum_nd_f16( 2619 uint32_t flags, 2620 xnn_operator_t* minimum_op_out); 2621 2622 enum xnn_status xnn_setup_minimum_nd_f16( 2623 xnn_operator_t minimum_op, 2624 size_t num_input1_dims, 2625 const size_t* input1_shape, 2626 size_t num_input2_dims, 2627 const size_t* input2_shape, 2628 const void* input1, 2629 const void* input2, 2630 void* output, 2631 pthreadpool_t threadpool); 2632 2633 enum xnn_status xnn_create_multiply_nd_f16( 2634 float output_min, 2635 float output_max, 2636 uint32_t flags, 2637 xnn_operator_t* multiply_op_out); 2638 2639 enum xnn_status xnn_setup_multiply_nd_f16( 2640 xnn_operator_t multiply_op, 2641 size_t num_input1_dims, 2642 const size_t* input1_shape, 2643 size_t num_input2_dims, 2644 const size_t* input2_shape, 2645 const void* input1, 2646 const void* input2, 2647 void* output, 2648 pthreadpool_t threadpool); 2649 2650 enum xnn_status xnn_create_negate_nc_f16( 2651 size_t channels, 2652 size_t input_stride, 2653 size_t output_stride, 2654 uint32_t flags, 2655 xnn_operator_t* negate_op_out); 2656 2657 enum xnn_status xnn_setup_negate_nc_f16( 2658 xnn_operator_t negate_op, 2659 size_t batch_size, 2660 const void* input, 2661 void* output, 2662 pthreadpool_t threadpool); 2663 2664 enum xnn_status xnn_create_prelu_nc_f16( 2665 size_t channels, 2666 size_t input_stride, 2667 size_t output_stride, 2668 const void* negative_slope, 2669 uint32_t flags, 2670 xnn_caches_t caches, 2671 xnn_operator_t* prelu_op_out); 2672 2673 enum xnn_status xnn_setup_prelu_nc_f16( 2674 xnn_operator_t prelu_op, 2675 size_t batch_size, 2676 const void* input, 2677 void* output, 2678 pthreadpool_t threadpool); 2679 2680 enum xnn_status xnn_create_resize_bilinear2d_nhwc_f16( 2681 size_t channels, 2682 size_t input_pixel_stride, 2683 size_t output_pixel_stride, 2684 uint32_t flags, 2685 xnn_operator_t* resize_op_out); 2686 2687 enum xnn_status xnn_setup_resize_bilinear2d_nhwc_f16( 2688 xnn_operator_t resize_op, 2689 size_t batch_size, 2690 size_t input_height, 2691 size_t input_width, 2692 size_t output_height, 2693 size_t output_width, 2694 const void* input, 2695 void* output, 2696 pthreadpool_t threadpool); 2697 2698 enum xnn_status xnn_create_sigmoid_nc_f16( 2699 size_t channels, 2700 size_t input_stride, 2701 size_t output_stride, 2702 uint32_t flags, 2703 xnn_operator_t* sigmoid_op_out); 2704 2705 enum xnn_status xnn_setup_sigmoid_nc_f16( 2706 xnn_operator_t sigmoid_op, 2707 size_t batch_size, 2708 const void* input, 2709 void* output, 2710 pthreadpool_t threadpool); 2711 2712 enum xnn_status xnn_create_softmax_nc_f16( 2713 size_t channels, 2714 size_t input_stride, 2715 size_t output_stride, 2716 uint32_t flags, 2717 xnn_operator_t* softmax_op_out); 2718 2719 enum xnn_status xnn_setup_softmax_nc_f16( 2720 xnn_operator_t softmax_op, 2721 size_t batch_size, 2722 const void* input, 2723 void* output, 2724 pthreadpool_t threadpool); 2725 2726 enum xnn_status xnn_create_square_nc_f16( 2727 size_t channels, 2728 size_t input_stride, 2729 size_t output_stride, 2730 uint32_t flags, 2731 xnn_operator_t* square_op_out); 2732 2733 enum xnn_status xnn_setup_square_nc_f16( 2734 xnn_operator_t square_op, 2735 size_t batch_size, 2736 const void* input, 2737 void* output, 2738 pthreadpool_t threadpool); 2739 2740 enum xnn_status xnn_create_square_root_nc_f16( 2741 size_t channels, 2742 size_t input_stride, 2743 size_t output_stride, 2744 uint32_t flags, 2745 xnn_operator_t* sqrt_op_out); 2746 2747 enum xnn_status xnn_setup_square_root_nc_f16( 2748 xnn_operator_t sqrt_op, 2749 size_t batch_size, 2750 const void* input, 2751 void* output, 2752 pthreadpool_t threadpool); 2753 2754 enum xnn_status xnn_create_squared_difference_nd_f16( 2755 uint32_t flags, 2756 xnn_operator_t* squared_difference_op_out); 2757 2758 enum xnn_status xnn_setup_squared_difference_nd_f16( 2759 xnn_operator_t squared_difference_op, 2760 size_t num_input1_dims, 2761 const size_t* input1_shape, 2762 size_t num_input2_dims, 2763 const size_t* input2_shape, 2764 const void* input1, 2765 const void* input2, 2766 void* output, 2767 pthreadpool_t threadpool); 2768 2769 enum xnn_status xnn_create_subtract_nd_f16( 2770 float output_min, 2771 float output_max, 2772 uint32_t flags, 2773 xnn_operator_t* subtract_op_out); 2774 2775 enum xnn_status xnn_setup_subtract_nd_f16( 2776 xnn_operator_t subtract_op, 2777 size_t num_input1_dims, 2778 const size_t* input1_shape, 2779 size_t num_input2_dims, 2780 const size_t* input2_shape, 2781 const void* input1, 2782 const void* input2, 2783 void* output, 2784 pthreadpool_t threadpool); 2785 2786 enum xnn_status xnn_create_truncation_nc_f16( 2787 size_t channels, 2788 size_t input_stride, 2789 size_t output_stride, 2790 uint32_t flags, 2791 xnn_operator_t* truncation_op_out); 2792 2793 enum xnn_status xnn_setup_truncation_nc_f16( 2794 xnn_operator_t truncation_op, 2795 size_t batch_size, 2796 const void* input, 2797 void* output, 2798 pthreadpool_t threadpool); 2799 2800 #endif // XNN_NO_F16_OPERATORS 2801 2802 #ifndef XNN_NO_X16_OPERATORS 2803 2804 enum xnn_status xnn_create_constant_pad_nd_x16( 2805 const void* padding_value, 2806 uint32_t flags, 2807 xnn_operator_t* constant_pad_op_out); 2808 2809 enum xnn_status xnn_setup_constant_pad_nd_x16( 2810 xnn_operator_t constant_pad_op, 2811 size_t num_dims, 2812 const size_t* input_shape, 2813 const size_t* pre_padding, 2814 const size_t* post_padding, 2815 const void* input, 2816 void* output, 2817 pthreadpool_t threadpool); 2818 2819 enum xnn_status xnn_create_copy_nc_x16( 2820 size_t channels, 2821 size_t input_stride, 2822 size_t output_stride, 2823 uint32_t flags, 2824 xnn_operator_t* copy_op_out); 2825 2826 enum xnn_status xnn_setup_copy_nc_x16( 2827 xnn_operator_t copy_op, 2828 size_t batch_size, 2829 const void* input, 2830 void* output, 2831 pthreadpool_t threadpool); 2832 2833 enum xnn_status xnn_create_depth_to_space_nhwc_x16( 2834 size_t output_channels, 2835 size_t input_channel_stride, 2836 size_t output_channel_stride, 2837 uint32_t block_size, 2838 uint32_t flags, 2839 xnn_operator_t* depth_to_space_op_out); 2840 2841 enum xnn_status xnn_setup_depth_to_space_nhwc_x16( 2842 xnn_operator_t depth_to_space_op, 2843 size_t batch_size, 2844 size_t input_height, 2845 size_t input_width, 2846 const void* input, 2847 void* output, 2848 pthreadpool_t threadpool); 2849 2850 enum xnn_status xnn_create_space_to_depth_nhwc_x16( 2851 size_t input_channels, 2852 size_t input_channel_stride, 2853 size_t output_channel_stride, 2854 uint32_t block_size, 2855 uint32_t flags, 2856 xnn_operator_t* space_to_depth_op_out); 2857 2858 enum xnn_status xnn_setup_space_to_depth_nhwc_x16( 2859 xnn_operator_t space_to_depth_op, 2860 size_t batch_size, 2861 size_t input_height, 2862 size_t input_width, 2863 const void* input, 2864 void* output, 2865 pthreadpool_t threadpool); 2866 2867 enum xnn_status xnn_create_transpose_nd_x16( 2868 uint32_t flags, 2869 xnn_operator_t* transpose_op_out); 2870 2871 enum xnn_status xnn_setup_transpose_nd_x16( 2872 xnn_operator_t transpose_op, 2873 const void* input, 2874 void* output, 2875 const size_t num_dims, 2876 const size_t* input_shape, 2877 const size_t* output_perm, 2878 pthreadpool_t threadpool); 2879 2880 enum xnn_status xnn_run_transpose_nd_x16( 2881 uint32_t flags, 2882 const void* input, 2883 void* output, 2884 const size_t num_dims, 2885 const size_t* input_shape, 2886 const size_t* output_perm, 2887 pthreadpool_t threadpool); 2888 2889 #endif // XNN_NO_X16_OPERATORS 2890 2891 #ifndef XNN_NO_QC8_OPERATORS 2892 2893 enum xnn_status xnn_create_convolution2d_nhwc_qc8( 2894 uint32_t input_padding_top, 2895 uint32_t input_padding_right, 2896 uint32_t input_padding_bottom, 2897 uint32_t input_padding_left, 2898 uint32_t kernel_height, 2899 uint32_t kernel_width, 2900 uint32_t subsampling_height, 2901 uint32_t subsampling_width, 2902 uint32_t dilation_height, 2903 uint32_t dilation_width, 2904 uint32_t groups, 2905 size_t group_input_channels, 2906 size_t group_output_channels, 2907 size_t input_channel_stride, 2908 size_t output_channel_stride, 2909 int8_t input_zero_point, 2910 float input_scale, 2911 const float* kernel_scale, 2912 const int8_t* kernel, 2913 const int32_t* bias, 2914 int8_t output_zero_point, 2915 float output_scale, 2916 int8_t output_min, 2917 int8_t output_max, 2918 uint32_t flags, 2919 xnn_caches_t caches, 2920 xnn_operator_t* convolution_op_out); 2921 2922 enum xnn_status xnn_setup_convolution2d_nhwc_qc8( 2923 xnn_operator_t convolution_op, 2924 size_t batch_size, 2925 size_t input_height, 2926 size_t input_width, 2927 const int8_t* input, 2928 int8_t* output, 2929 pthreadpool_t threadpool); 2930 2931 #endif // XNN_NO_QC8_OPERATORS 2932 2933 #ifndef XNN_NO_QS8_OPERATORS 2934 2935 enum xnn_status xnn_create_add_nd_qs8( 2936 int8_t input1_zero_point, 2937 float input1_scale, 2938 int8_t input2_zero_point, 2939 float input2_scale, 2940 int8_t output_zero_point, 2941 float output_scale, 2942 int8_t output_min, 2943 int8_t output_max, 2944 uint32_t flags, 2945 xnn_operator_t* add_op_out); 2946 2947 enum xnn_status xnn_setup_add_nd_qs8( 2948 xnn_operator_t add_op, 2949 size_t num_input1_dims, 2950 const size_t* input1_shape, 2951 size_t num_input2_dims, 2952 const size_t* input2_shape, 2953 const int8_t* input1, 2954 const int8_t* input2, 2955 int8_t* output, 2956 pthreadpool_t threadpool); 2957 2958 enum xnn_status xnn_create_convolution2d_nhwc_qs8( 2959 uint32_t input_padding_top, 2960 uint32_t input_padding_right, 2961 uint32_t input_padding_bottom, 2962 uint32_t input_padding_left, 2963 uint32_t kernel_height, 2964 uint32_t kernel_width, 2965 uint32_t subsampling_height, 2966 uint32_t subsampling_width, 2967 uint32_t dilation_height, 2968 uint32_t dilation_width, 2969 uint32_t groups, 2970 size_t group_input_channels, 2971 size_t group_output_channels, 2972 size_t input_channel_stride, 2973 size_t output_channel_stride, 2974 int8_t input_zero_point, 2975 float input_scale, 2976 float kernel_scale, 2977 const int8_t* kernel, 2978 const int32_t* bias, 2979 int8_t output_zero_point, 2980 float output_scale, 2981 int8_t output_min, 2982 int8_t output_max, 2983 uint32_t flags, 2984 xnn_caches_t caches, 2985 xnn_operator_t* convolution_op_out); 2986 2987 enum xnn_status xnn_setup_convolution2d_nhwc_qs8( 2988 xnn_operator_t convolution_op, 2989 size_t batch_size, 2990 size_t input_height, 2991 size_t input_width, 2992 const int8_t* input, 2993 int8_t* output, 2994 pthreadpool_t threadpool); 2995 2996 enum xnn_status xnn_create_deconvolution2d_nhwc_qs8( 2997 uint32_t output_padding_top, 2998 uint32_t output_padding_right, 2999 uint32_t output_padding_bottom, 3000 uint32_t output_padding_left, 3001 uint32_t kernel_height, 3002 uint32_t kernel_width, 3003 uint32_t stride_height, 3004 uint32_t stride_width, 3005 uint32_t dilation_height, 3006 uint32_t dilation_width, 3007 uint32_t groups, 3008 size_t group_input_channels, 3009 size_t group_output_channels, 3010 size_t input_pixel_stride, 3011 size_t output_pixel_stride, 3012 int8_t input_zero_point, 3013 float input_scale, 3014 float kernel_scale, 3015 const int8_t* kernel, 3016 const int32_t* bias, 3017 int8_t output_zero_point, 3018 float output_scale, 3019 int8_t output_min, 3020 int8_t output_max, 3021 uint32_t flags, 3022 xnn_caches_t caches, 3023 xnn_operator_t* deconvolution_op_out); 3024 3025 enum xnn_status xnn_setup_deconvolution2d_nhwc_qs8( 3026 xnn_operator_t deconvolution_op, 3027 size_t batch_size, 3028 size_t input_height, 3029 size_t input_width, 3030 uint32_t adjustment_height, 3031 uint32_t adjustment_width, 3032 const int8_t* input, 3033 int8_t* output, 3034 pthreadpool_t threadpool); 3035 3036 enum xnn_status xnn_create_elu_nc_qs8( 3037 size_t channels, 3038 size_t input_stride, 3039 size_t output_stride, 3040 float alpha, 3041 int8_t input_zero_point, 3042 float input_scale, 3043 int8_t output_zero_point, 3044 float output_scale, 3045 int8_t output_min, 3046 int8_t output_max, 3047 uint32_t flags, 3048 xnn_operator_t* elu_op_out); 3049 3050 enum xnn_status xnn_setup_elu_nc_qs8( 3051 xnn_operator_t elu_op, 3052 size_t batch_size, 3053 const int8_t* input, 3054 int8_t* output, 3055 pthreadpool_t threadpool); 3056 3057 enum xnn_status xnn_create_fully_connected_nc_qs8( 3058 size_t input_channels, 3059 size_t output_channels, 3060 size_t input_stride, 3061 size_t output_stride, 3062 int8_t input_zero_point, 3063 float input_scale, 3064 float kernel_scale, 3065 const int8_t* kernel, 3066 const int32_t* bias, 3067 int8_t output_zero_point, 3068 float output_scale, 3069 int8_t output_min, 3070 int8_t output_max, 3071 uint32_t flags, 3072 xnn_caches_t caches, 3073 xnn_operator_t* fully_connected_op_out); 3074 3075 enum xnn_status xnn_setup_fully_connected_nc_qs8( 3076 xnn_operator_t fully_connected_op, 3077 size_t batch_size, 3078 const int8_t* input, 3079 int8_t* output, 3080 pthreadpool_t threadpool); 3081 3082 enum xnn_status xnn_create_global_average_pooling_nwc_qs8( 3083 size_t channels, 3084 size_t input_stride, 3085 size_t output_stride, 3086 int8_t input_zero_point, 3087 float input_scale, 3088 int8_t output_zero_point, 3089 float output_scale, 3090 int8_t output_min, 3091 int8_t output_max, 3092 uint32_t flags, 3093 xnn_operator_t* global_average_pooling_op_out); 3094 3095 enum xnn_status xnn_setup_global_average_pooling_nwc_qs8( 3096 xnn_operator_t global_average_pooling_op, 3097 size_t batch_size, 3098 size_t width, 3099 const int8_t* input, 3100 int8_t* output, 3101 pthreadpool_t threadpool); 3102 3103 enum xnn_status xnn_create_multiply_nd_qs8( 3104 int8_t input1_zero_point, 3105 float input1_scale, 3106 int8_t input2_zero_point, 3107 float input2_scale, 3108 int8_t output_zero_point, 3109 float output_scale, 3110 int8_t output_min, 3111 int8_t output_max, 3112 uint32_t flags, 3113 xnn_operator_t* multiply_op_out); 3114 3115 enum xnn_status xnn_setup_multiply_nd_qs8( 3116 xnn_operator_t multiply_op, 3117 size_t num_input1_dims, 3118 const size_t* input1_shape, 3119 size_t num_input2_dims, 3120 const size_t* input2_shape, 3121 const int8_t* input1, 3122 const int8_t* input2, 3123 int8_t* output, 3124 pthreadpool_t threadpool); 3125 3126 enum xnn_status xnn_create_leaky_relu_nc_qs8( 3127 size_t channels, 3128 size_t input_stride, 3129 size_t output_stride, 3130 float negative_slope, 3131 int8_t input_zero_point, 3132 float input_scale, 3133 int8_t output_zero_point, 3134 float output_scale, 3135 uint32_t flags, 3136 xnn_operator_t* leaky_relu_op_out); 3137 3138 enum xnn_status xnn_setup_leaky_relu_nc_qs8( 3139 xnn_operator_t leaky_relu_op, 3140 size_t batch_size, 3141 const int8_t* input, 3142 int8_t* output, 3143 pthreadpool_t threadpool); 3144 3145 enum xnn_status xnn_create_sigmoid_nc_qs8( 3146 size_t channels, 3147 size_t input_stride, 3148 size_t output_stride, 3149 int8_t input_zero_point, 3150 float input_scale, 3151 int8_t output_zero_point, 3152 float output_scale, 3153 int8_t output_min, 3154 int8_t output_max, 3155 uint32_t flags, 3156 xnn_operator_t* sigmoid_op_out); 3157 3158 enum xnn_status xnn_setup_sigmoid_nc_qs8( 3159 xnn_operator_t sigmoid_op, 3160 size_t batch_size, 3161 const int8_t* input, 3162 int8_t* output, 3163 pthreadpool_t threadpool); 3164 3165 enum xnn_status xnn_create_subtract_nd_qs8( 3166 int8_t input1_zero_point, 3167 float input1_scale, 3168 int8_t input2_zero_point, 3169 float input2_scale, 3170 int8_t output_zero_point, 3171 float output_scale, 3172 int8_t output_min, 3173 int8_t output_max, 3174 uint32_t flags, 3175 xnn_operator_t* subtract_op_out); 3176 3177 enum xnn_status xnn_setup_subtract_nd_qs8( 3178 xnn_operator_t subtract_op, 3179 size_t num_input1_dims, 3180 const size_t* input1_shape, 3181 size_t num_input2_dims, 3182 const size_t* input2_shape, 3183 const int8_t* input1, 3184 const int8_t* input2, 3185 int8_t* output, 3186 pthreadpool_t threadpool); 3187 3188 enum xnn_status xnn_create_tanh_nc_qs8( 3189 size_t channels, 3190 size_t input_stride, 3191 size_t output_stride, 3192 int8_t input_zero_point, 3193 float input_scale, 3194 int8_t output_zero_point, 3195 float output_scale, 3196 int8_t output_min, 3197 int8_t output_max, 3198 uint32_t flags, 3199 xnn_operator_t* tanh_op_out); 3200 3201 enum xnn_status xnn_setup_tanh_nc_qs8( 3202 xnn_operator_t tanh_op, 3203 size_t batch_size, 3204 const int8_t* input, 3205 int8_t* output, 3206 pthreadpool_t threadpool); 3207 3208 #endif // XNN_NO_QS8_OPERATORS 3209 3210 #ifndef XNN_NO_QU8_OPERATORS 3211 3212 enum xnn_status xnn_create_add_nd_qu8( 3213 uint8_t input1_zero_point, 3214 float input1_scale, 3215 uint8_t input2_zero_point, 3216 float input2_scale, 3217 uint8_t output_zero_point, 3218 float output_scale, 3219 uint8_t output_min, 3220 uint8_t output_max, 3221 uint32_t flags, 3222 xnn_operator_t* add_op_out); 3223 3224 enum xnn_status xnn_setup_add_nd_qu8( 3225 xnn_operator_t add_op, 3226 size_t num_input1_dims, 3227 const size_t* input1_shape, 3228 size_t num_input2_dims, 3229 const size_t* input2_shape, 3230 const uint8_t* input1, 3231 const uint8_t* input2, 3232 uint8_t* output, 3233 pthreadpool_t threadpool); 3234 3235 enum xnn_status xnn_create_average_pooling2d_nhwc_qu8( 3236 uint32_t input_padding_top, 3237 uint32_t input_padding_right, 3238 uint32_t input_padding_bottom, 3239 uint32_t input_padding_left, 3240 uint32_t pooling_height, 3241 uint32_t pooling_width, 3242 uint32_t stride_height, 3243 uint32_t stride_width, 3244 size_t channels, 3245 size_t input_pixel_stride, 3246 size_t output_pixel_stride, 3247 uint8_t input_zero_point, 3248 float input_scale, 3249 uint8_t output_zero_point, 3250 float output_scale, 3251 uint8_t output_min, 3252 uint8_t output_max, 3253 uint32_t flags, 3254 xnn_operator_t* average_pooling_op_out); 3255 3256 enum xnn_status xnn_setup_average_pooling2d_nhwc_qu8( 3257 xnn_operator_t average_pooling_op, 3258 size_t batch_size, 3259 size_t input_height, 3260 size_t input_width, 3261 const uint8_t* input, 3262 uint8_t* output, 3263 pthreadpool_t threadpool); 3264 3265 enum xnn_status xnn_create_convolution2d_nhwc_qu8( 3266 uint32_t input_padding_top, 3267 uint32_t input_padding_right, 3268 uint32_t input_padding_bottom, 3269 uint32_t input_padding_left, 3270 uint32_t kernel_height, 3271 uint32_t kernel_width, 3272 uint32_t subsampling_height, 3273 uint32_t subsampling_width, 3274 uint32_t dilation_height, 3275 uint32_t dilation_width, 3276 uint32_t groups, 3277 size_t group_input_channels, 3278 size_t group_output_channels, 3279 size_t input_channel_stride, 3280 size_t output_channel_stride, 3281 uint8_t input_zero_point, 3282 float input_scale, 3283 uint8_t kernel_zero_point, 3284 float kernel_scale, 3285 const uint8_t* kernel, 3286 const int32_t* bias, 3287 uint8_t output_zero_point, 3288 float output_scale, 3289 uint8_t output_min, 3290 uint8_t output_max, 3291 uint32_t flags, 3292 xnn_caches_t caches, 3293 xnn_operator_t* convolution_op_out); 3294 3295 enum xnn_status xnn_setup_convolution2d_nhwc_qu8( 3296 xnn_operator_t convolution_op, 3297 size_t batch_size, 3298 size_t input_height, 3299 size_t input_width, 3300 const uint8_t* input, 3301 uint8_t* output, 3302 pthreadpool_t threadpool); 3303 3304 enum xnn_status xnn_create_deconvolution2d_nhwc_qu8( 3305 uint32_t output_padding_top, 3306 uint32_t output_padding_right, 3307 uint32_t output_padding_bottom, 3308 uint32_t output_padding_left, 3309 uint32_t kernel_height, 3310 uint32_t kernel_width, 3311 uint32_t stride_height, 3312 uint32_t stride_width, 3313 uint32_t dilation_height, 3314 uint32_t dilation_width, 3315 uint32_t groups, 3316 size_t group_input_channels, 3317 size_t group_output_channels, 3318 size_t input_pixel_stride, 3319 size_t output_pixel_stride, 3320 uint8_t input_zero_point, 3321 float input_scale, 3322 uint8_t kernel_zero_point, 3323 float kernel_scale, 3324 const uint8_t* kernel, 3325 const int32_t* bias, 3326 uint8_t output_zero_point, 3327 float output_scale, 3328 uint8_t output_min, 3329 uint8_t output_max, 3330 uint32_t flags, 3331 xnn_caches_t caches, 3332 xnn_operator_t* deconvolution_op_out); 3333 3334 enum xnn_status xnn_setup_deconvolution2d_nhwc_qu8( 3335 xnn_operator_t deconvolution_op, 3336 size_t batch_size, 3337 size_t input_height, 3338 size_t input_width, 3339 uint32_t adjustment_height, 3340 uint32_t adjustment_width, 3341 const uint8_t* input, 3342 uint8_t* output, 3343 pthreadpool_t threadpool); 3344 3345 enum xnn_status xnn_create_fully_connected_nc_qu8( 3346 size_t input_channels, 3347 size_t output_channels, 3348 size_t input_stride, 3349 size_t output_stride, 3350 uint8_t input_zero_point, 3351 float input_scale, 3352 uint8_t kernel_zero_point, 3353 float kernel_scale, 3354 const uint8_t* kernel, 3355 const int32_t* bias, 3356 uint8_t output_zero_point, 3357 float output_scale, 3358 uint8_t output_min, 3359 uint8_t output_max, 3360 uint32_t flags, 3361 xnn_caches_t caches, 3362 xnn_operator_t* fully_connected_op_out); 3363 3364 enum xnn_status xnn_setup_fully_connected_nc_qu8( 3365 xnn_operator_t fully_connected_op, 3366 size_t batch_size, 3367 const uint8_t* input, 3368 uint8_t* output, 3369 pthreadpool_t threadpool); 3370 3371 enum xnn_status xnn_create_global_average_pooling_nwc_qu8( 3372 size_t channels, 3373 size_t input_stride, 3374 size_t output_stride, 3375 uint8_t input_zero_point, 3376 float input_scale, 3377 uint8_t output_zero_point, 3378 float output_scale, 3379 uint8_t output_min, 3380 uint8_t output_max, 3381 uint32_t flags, 3382 xnn_operator_t* global_average_pooling_op_out); 3383 3384 enum xnn_status xnn_setup_global_average_pooling_nwc_qu8( 3385 xnn_operator_t global_average_pooling_op, 3386 size_t batch_size, 3387 size_t width, 3388 const uint8_t* input, 3389 uint8_t* output, 3390 pthreadpool_t threadpool); 3391 3392 enum xnn_status xnn_create_leaky_relu_nc_qu8( 3393 size_t channels, 3394 size_t input_stride, 3395 size_t output_stride, 3396 float negative_slope, 3397 uint8_t input_zero_point, 3398 float input_scale, 3399 uint8_t output_zero_point, 3400 float output_scale, 3401 uint32_t flags, 3402 xnn_operator_t* leaky_relu_op_out); 3403 3404 enum xnn_status xnn_setup_leaky_relu_nc_qu8( 3405 xnn_operator_t leaky_relu_op, 3406 size_t batch_size, 3407 const uint8_t* input, 3408 uint8_t* output, 3409 pthreadpool_t threadpool); 3410 3411 enum xnn_status xnn_create_multiply_nd_qu8( 3412 uint8_t input1_zero_point, 3413 float input1_scale, 3414 uint8_t input2_zero_point, 3415 float input2_scale, 3416 uint8_t output_zero_point, 3417 float output_scale, 3418 uint8_t output_min, 3419 uint8_t output_max, 3420 uint32_t flags, 3421 xnn_operator_t* multiply_op_out); 3422 3423 enum xnn_status xnn_setup_multiply_nd_qu8( 3424 xnn_operator_t multiply_op, 3425 size_t num_input1_dims, 3426 const size_t* input1_shape, 3427 size_t num_input2_dims, 3428 const size_t* input2_shape, 3429 const uint8_t* input1, 3430 const uint8_t* input2, 3431 uint8_t* output, 3432 pthreadpool_t threadpool); 3433 3434 enum xnn_status xnn_create_sigmoid_nc_qu8( 3435 size_t channels, 3436 size_t input_stride, 3437 size_t output_stride, 3438 uint8_t input_zero_point, 3439 float input_scale, 3440 uint8_t output_zero_point, 3441 float output_scale, 3442 uint8_t output_min, 3443 uint8_t output_max, 3444 uint32_t flags, 3445 xnn_operator_t* sigmoid_op_out); 3446 3447 enum xnn_status xnn_setup_sigmoid_nc_qu8( 3448 xnn_operator_t sigmoid_op, 3449 size_t batch_size, 3450 const uint8_t* input, 3451 uint8_t* output, 3452 pthreadpool_t threadpool); 3453 3454 enum xnn_status xnn_create_softmax_nc_qu8( 3455 size_t channels, 3456 size_t input_stride, 3457 size_t output_stride, 3458 float input_scale, 3459 uint8_t output_zero_point, 3460 float output_scale, 3461 uint32_t flags, 3462 xnn_operator_t* softmax_op_out); 3463 3464 enum xnn_status xnn_setup_softmax_nc_qu8( 3465 xnn_operator_t softmax_op, 3466 size_t batch_size, 3467 const uint8_t* input, 3468 uint8_t* output, 3469 pthreadpool_t threadpool); 3470 3471 enum xnn_status xnn_create_subtract_nd_qu8( 3472 uint8_t input1_zero_point, 3473 float input1_scale, 3474 uint8_t input2_zero_point, 3475 float input2_scale, 3476 uint8_t output_zero_point, 3477 float output_scale, 3478 uint8_t output_min, 3479 uint8_t output_max, 3480 uint32_t flags, 3481 xnn_operator_t* subtract_op_out); 3482 3483 enum xnn_status xnn_setup_subtract_nd_qu8( 3484 xnn_operator_t subtract_op, 3485 size_t num_input1_dims, 3486 const size_t* input1_shape, 3487 size_t num_input2_dims, 3488 const size_t* input2_shape, 3489 const uint8_t* input1, 3490 const uint8_t* input2, 3491 uint8_t* output, 3492 pthreadpool_t threadpool); 3493 3494 enum xnn_status xnn_create_tanh_nc_qu8( 3495 size_t channels, 3496 size_t input_stride, 3497 size_t output_stride, 3498 uint8_t input_zero_point, 3499 float input_scale, 3500 uint8_t output_zero_point, 3501 float output_scale, 3502 uint8_t output_min, 3503 uint8_t output_max, 3504 uint32_t flags, 3505 xnn_operator_t* tanh_op_out); 3506 3507 enum xnn_status xnn_setup_tanh_nc_qu8( 3508 xnn_operator_t tanh_op, 3509 size_t batch_size, 3510 const uint8_t* input, 3511 uint8_t* output, 3512 pthreadpool_t threadpool); 3513 3514 #endif // XNN_NO_QU8_OPERATORS 3515 3516 #ifndef XNN_NO_S8_OPERATORS 3517 3518 enum xnn_status xnn_create_clamp_nc_s8( 3519 size_t channels, 3520 size_t input_stride, 3521 size_t output_stride, 3522 int8_t output_min, 3523 int8_t output_max, 3524 uint32_t flags, 3525 xnn_operator_t* clamp_op_out); 3526 3527 enum xnn_status xnn_setup_clamp_nc_s8( 3528 xnn_operator_t clamp_op, 3529 size_t batch_size, 3530 const int8_t* input, 3531 int8_t* output, 3532 pthreadpool_t threadpool); 3533 3534 enum xnn_status xnn_create_max_pooling2d_nhwc_s8( 3535 uint32_t input_padding_top, 3536 uint32_t input_padding_right, 3537 uint32_t input_padding_bottom, 3538 uint32_t input_padding_left, 3539 uint32_t pooling_height, 3540 uint32_t pooling_width, 3541 uint32_t stride_height, 3542 uint32_t stride_width, 3543 uint32_t dilation_height, 3544 uint32_t dilation_width, 3545 size_t channels, 3546 size_t input_pixel_stride, 3547 size_t output_pixel_stride, 3548 int8_t output_min, 3549 int8_t output_max, 3550 uint32_t flags, 3551 xnn_operator_t* max_pooling_op_out); 3552 3553 enum xnn_status xnn_setup_max_pooling2d_nhwc_s8( 3554 xnn_operator_t max_pooling_op, 3555 size_t batch_size, 3556 size_t input_height, 3557 size_t input_width, 3558 const int8_t* input, 3559 int8_t* output, 3560 pthreadpool_t threadpool); 3561 3562 enum xnn_status xnn_create_resize_bilinear2d_nhwc_s8( 3563 size_t channels, 3564 size_t input_pixel_stride, 3565 size_t output_pixel_stride, 3566 uint32_t flags, 3567 xnn_operator_t* resize_op_out); 3568 3569 enum xnn_status xnn_setup_resize_bilinear2d_nhwc_s8( 3570 xnn_operator_t resize_op, 3571 size_t batch_size, 3572 size_t input_height, 3573 size_t input_width, 3574 size_t output_height, 3575 size_t output_width, 3576 const int8_t* input, 3577 int8_t* output, 3578 pthreadpool_t threadpool); 3579 3580 #endif // XNN_NO_S8_OPERATORS 3581 3582 #ifndef XNN_NO_U8_OPERATORS 3583 3584 enum xnn_status xnn_create_clamp_nc_u8( 3585 size_t channels, 3586 size_t input_stride, 3587 size_t output_stride, 3588 uint8_t output_min, 3589 uint8_t output_max, 3590 uint32_t flags, 3591 xnn_operator_t* clamp_op_out); 3592 3593 enum xnn_status xnn_setup_clamp_nc_u8( 3594 xnn_operator_t clamp_op, 3595 size_t batch_size, 3596 const uint8_t* input, 3597 uint8_t* output, 3598 pthreadpool_t threadpool); 3599 3600 enum xnn_status xnn_create_max_pooling2d_nhwc_u8( 3601 uint32_t input_padding_top, 3602 uint32_t input_padding_right, 3603 uint32_t input_padding_bottom, 3604 uint32_t input_padding_left, 3605 uint32_t pooling_height, 3606 uint32_t pooling_width, 3607 uint32_t stride_height, 3608 uint32_t stride_width, 3609 uint32_t dilation_height, 3610 uint32_t dilation_width, 3611 size_t channels, 3612 size_t input_pixel_stride, 3613 size_t output_pixel_stride, 3614 uint8_t output_min, 3615 uint8_t output_max, 3616 uint32_t flags, 3617 xnn_operator_t* max_pooling_op_out); 3618 3619 enum xnn_status xnn_setup_max_pooling2d_nhwc_u8( 3620 xnn_operator_t max_pooling_op, 3621 size_t batch_size, 3622 size_t input_height, 3623 size_t input_width, 3624 const uint8_t* input, 3625 uint8_t* output, 3626 pthreadpool_t threadpool); 3627 3628 enum xnn_status xnn_create_resize_bilinear2d_nhwc_u8( 3629 size_t channels, 3630 size_t input_pixel_stride, 3631 size_t output_pixel_stride, 3632 uint32_t flags, 3633 xnn_operator_t* resize_op_out); 3634 3635 enum xnn_status xnn_setup_resize_bilinear2d_nhwc_u8( 3636 xnn_operator_t resize_op, 3637 size_t batch_size, 3638 size_t input_height, 3639 size_t input_width, 3640 size_t output_height, 3641 size_t output_width, 3642 const uint8_t* input, 3643 uint8_t* output, 3644 pthreadpool_t threadpool); 3645 3646 #endif // XNN_NO_U8_OPERATORS 3647 3648 #ifndef XNN_NO_X8_OPERATORS 3649 3650 enum xnn_status xnn_create_copy_nc_x8( 3651 size_t channels, 3652 size_t input_stride, 3653 size_t output_stride, 3654 uint32_t flags, 3655 xnn_operator_t* copy_op_out); 3656 3657 enum xnn_status xnn_setup_copy_nc_x8( 3658 xnn_operator_t copy_op, 3659 size_t batch_size, 3660 const void* input, 3661 void* output, 3662 pthreadpool_t threadpool); 3663 3664 enum xnn_status xnn_create_channel_shuffle_nc_x8( 3665 size_t groups, 3666 size_t group_channels, 3667 size_t input_stride, 3668 size_t output_stride, 3669 uint32_t flags, 3670 xnn_operator_t* channel_shuffle_op_out); 3671 3672 enum xnn_status xnn_setup_channel_shuffle_nc_x8( 3673 xnn_operator_t channel_shuffle_op, 3674 size_t batch_size, 3675 const void* input, 3676 void* output, 3677 pthreadpool_t threadpool); 3678 3679 enum xnn_status xnn_create_constant_pad_nd_x8( 3680 const void* padding_value, 3681 uint32_t flags, 3682 xnn_operator_t* constant_pad_op_out); 3683 3684 enum xnn_status xnn_setup_constant_pad_nd_x8( 3685 xnn_operator_t constant_pad_op, 3686 size_t num_dims, 3687 const size_t* input_shape, 3688 const size_t* pre_padding, 3689 const size_t* post_padding, 3690 const void* input, 3691 void* output, 3692 pthreadpool_t threadpool); 3693 3694 enum xnn_status xnn_create_depth_to_space_nhwc_x8( 3695 size_t output_channels, 3696 size_t input_channel_stride, 3697 size_t output_channel_stride, 3698 uint32_t block_size, 3699 uint32_t flags, 3700 xnn_operator_t* depth_to_space_op_out); 3701 3702 enum xnn_status xnn_setup_depth_to_space_nhwc_x8( 3703 xnn_operator_t depth_to_space_op, 3704 size_t batch_size, 3705 size_t input_height, 3706 size_t input_width, 3707 const void* input, 3708 void* output, 3709 pthreadpool_t threadpool); 3710 3711 enum xnn_status xnn_create_space_to_depth_nhwc_x8( 3712 size_t input_channels, 3713 size_t input_channel_stride, 3714 size_t output_channel_stride, 3715 uint32_t block_size, 3716 uint32_t flags, 3717 xnn_operator_t* space_to_depth_op_out); 3718 3719 enum xnn_status xnn_setup_space_to_depth_nhwc_x8( 3720 xnn_operator_t space_to_depth_op, 3721 size_t batch_size, 3722 size_t input_height, 3723 size_t input_width, 3724 const void* input, 3725 void* output, 3726 pthreadpool_t threadpool); 3727 3728 enum xnn_status xnn_create_transpose_nd_x8( 3729 uint32_t flags, 3730 xnn_operator_t* transpose_op_out); 3731 3732 enum xnn_status xnn_setup_transpose_nd_x8( 3733 xnn_operator_t transpose_op, 3734 const void* input, 3735 void* output, 3736 const size_t num_dims, 3737 const size_t* input_shape, 3738 const size_t* output_perm, 3739 pthreadpool_t threadpool); 3740 3741 enum xnn_status xnn_run_transpose_nd_x8( 3742 uint32_t flags, 3743 const void* input, 3744 void* output, 3745 const size_t num_dims, 3746 const size_t* input_shape, 3747 const size_t* output_perm, 3748 pthreadpool_t threadpool); 3749 3750 #endif // XNN_NO_X8_OPERATORS 3751 3752 #ifndef XNN_NO_CVT_OPERATORS 3753 3754 enum xnn_status xnn_create_convert_nc_f16_f32( 3755 size_t channels, 3756 size_t input_stride, 3757 size_t output_stride, 3758 uint32_t flags, 3759 xnn_operator_t* convert_op_out); 3760 3761 enum xnn_status xnn_setup_convert_nc_f16_f32( 3762 xnn_operator_t convert_op, 3763 size_t batch_size, 3764 const void* input, 3765 float* output, 3766 pthreadpool_t threadpool); 3767 3768 enum xnn_status xnn_create_convert_nc_f32_f16( 3769 size_t channels, 3770 size_t input_stride, 3771 size_t output_stride, 3772 uint32_t flags, 3773 xnn_operator_t* convert_op_out); 3774 3775 enum xnn_status xnn_setup_convert_nc_f32_f16( 3776 xnn_operator_t convert_op, 3777 size_t batch_size, 3778 const float* input, 3779 void* output, 3780 pthreadpool_t threadpool); 3781 3782 enum xnn_status xnn_create_convert_nc_f32_qs8( 3783 size_t channels, 3784 size_t input_stride, 3785 size_t output_stride, 3786 float output_scale, 3787 int8_t output_zero_point, 3788 int8_t output_min, 3789 int8_t output_max, 3790 uint32_t flags, 3791 xnn_operator_t* convert_op_out); 3792 3793 enum xnn_status xnn_setup_convert_nc_f32_qs8( 3794 xnn_operator_t convert_op, 3795 size_t batch_size, 3796 const float* input, 3797 int8_t* output, 3798 pthreadpool_t threadpool); 3799 3800 enum xnn_status xnn_create_convert_nc_f32_qu8( 3801 size_t channels, 3802 size_t input_stride, 3803 size_t output_stride, 3804 float output_scale, 3805 uint8_t output_zero_point, 3806 uint8_t output_min, 3807 uint8_t output_max, 3808 uint32_t flags, 3809 xnn_operator_t* convert_op_out); 3810 3811 enum xnn_status xnn_setup_convert_nc_f32_qu8( 3812 xnn_operator_t convert_op, 3813 size_t batch_size, 3814 const float* input, 3815 uint8_t* output, 3816 pthreadpool_t threadpool); 3817 3818 enum xnn_status xnn_create_convert_nc_qs8( 3819 size_t channels, 3820 size_t input_stride, 3821 size_t output_stride, 3822 float input_scale, 3823 int8_t input_zero_point, 3824 float output_scale, 3825 int8_t output_zero_point, 3826 uint32_t flags, 3827 xnn_operator_t* convert_op_out); 3828 3829 enum xnn_status xnn_setup_convert_nc_qs8( 3830 xnn_operator_t convert_op, 3831 size_t batch_size, 3832 const int8_t* input, 3833 int8_t* output, 3834 pthreadpool_t threadpool); 3835 3836 enum xnn_status xnn_create_convert_nc_qs8_f32( 3837 size_t channels, 3838 size_t input_stride, 3839 size_t output_stride, 3840 float input_scale, 3841 int8_t input_zero_point, 3842 uint32_t flags, 3843 xnn_operator_t* convert_op_out); 3844 3845 enum xnn_status xnn_setup_convert_nc_qs8_f32( 3846 xnn_operator_t convert_op, 3847 size_t batch_size, 3848 const int8_t* input, 3849 float* output, 3850 pthreadpool_t threadpool); 3851 3852 enum xnn_status xnn_create_convert_nc_qu8( 3853 size_t channels, 3854 size_t input_stride, 3855 size_t output_stride, 3856 float input_scale, 3857 uint8_t input_zero_point, 3858 float output_scale, 3859 uint8_t output_zero_point, 3860 uint32_t flags, 3861 xnn_operator_t* convert_op_out); 3862 3863 enum xnn_status xnn_setup_convert_nc_qu8( 3864 xnn_operator_t convert_op, 3865 size_t batch_size, 3866 const uint8_t* input, 3867 uint8_t* output, 3868 pthreadpool_t threadpool); 3869 3870 enum xnn_status xnn_create_convert_nc_qu8_f32( 3871 size_t channels, 3872 size_t input_stride, 3873 size_t output_stride, 3874 float input_scale, 3875 uint8_t input_zero_point, 3876 uint32_t flags, 3877 xnn_operator_t* convert_op_out); 3878 3879 enum xnn_status xnn_setup_convert_nc_qu8_f32( 3880 xnn_operator_t convert_op, 3881 size_t batch_size, 3882 const uint8_t* input, 3883 float* output, 3884 pthreadpool_t threadpool); 3885 3886 #endif // XNN_NO_CVT_OPERATORS 3887 3888 #ifdef __cplusplus 3889 } // extern "C" 3890 #endif 3891