1 // Copyright (c) Facebook, Inc. and its affiliates. 2 // All rights reserved. 3 // 4 // Copyright 2019 Google LLC 5 // 6 // This source code is licensed under the BSD-style license found in the 7 // LICENSE file in the root directory of this source tree. 8 9 #pragma once 10 11 #include <stdbool.h> 12 #include <stddef.h> 13 #include <stdint.h> 14 15 #include <pthreadpool.h> 16 17 #ifdef __cplusplus 18 extern "C" { 19 #endif 20 21 /// The number of bytes XNNPACK may read beyond array bounds. 22 /// The caller must allocate at least this many extra bytes after the tensor data passed to XNNPACK. 23 /// 24 /// Note: XNNPACK reads, but never writes beyond array bounds. 25 #define XNN_EXTRA_BYTES 16 26 27 /// Maximum number of dimensions in tensor shape. 28 #define XNN_MAX_TENSOR_DIMS 6 29 30 /// Allow sparse inference in a Runtime. 31 /// 32 /// Note: this flag forces XNNPACK to consider sparse inference, but does not guarantee it. 33 #define XNN_FLAG_SPARSE_INFERENCE 0x00000001 34 35 /// Allow IEEE FP16 inference in a Runtime. 36 /// 37 /// Note: this flag forces XNNPACK to consider IEEE FP16 inference, but does not guarantee it. 38 #define XNN_FLAG_FP16_INFERENCE 0x00000002 39 40 /// The convolution operator represents a depthwise convolution, and use HWGo layout for filters. 41 #define XNN_FLAG_DEPTHWISE_CONVOLUTION 0x00000001 42 43 /// Assume transposed weights in a fully connected operator. 44 #define XNN_FLAG_TRANSPOSE_WEIGHTS 0x00000001 45 46 /// The operator assumes NHWC layout for the input, regardless of the output layout. 47 #define XNN_FLAG_INPUT_NHWC 0x00000002 48 49 /// Match "SAME" padding in TensorFlow. Exact padding values are computed dynamically depending on input size. 50 #define XNN_FLAG_TENSORFLOW_SAME_PADDING 0x00000004 51 52 /// Implicitly flatten and reshape input of a Fully Connected operator into a 2D tensor. 53 #define XNN_FLAG_TENSORFLOW_RESHAPE_2D 0x00000004 54 55 /// Match behaviour of TensorFlow 1.x. 56 #define XNN_FLAG_TENSORFLOW_LEGACY_MODE 0x00000004 57 58 /// Static weights of the FP16 operator are in FP32 format. 59 #define XNN_FLAG_FP32_STATIC_WEIGHTS 0x00000008 60 61 /// Align corners of input and output images in resize operations. 62 #define XNN_FLAG_ALIGN_CORNERS 0x00000008 63 64 /// Yield worker threads of the thread pool to the system scheduler after the inference. 65 #define XNN_FLAG_YIELD_WORKERS 0x00000010 66 67 /// Status code for any XNNPACK function call. 68 enum xnn_status { 69 /// The call succeeded, and all output arguments now contain valid data. 70 xnn_status_success = 0, 71 xnn_status_uninitialized = 1, 72 xnn_status_invalid_parameter = 2, 73 xnn_status_invalid_state = 3, 74 xnn_status_unsupported_parameter = 4, 75 xnn_status_unsupported_hardware = 5, 76 xnn_status_out_of_memory = 6, 77 }; 78 79 struct xnn_allocator { 80 /// User-specified pointer that will be passed as-is to all functions in this structure. 81 void* context; 82 /// Pointer to a function to be called for general memory allocation. 83 /// 84 /// @param context - The user-specified pointer from xnn_allocator structure. 85 /// @param size - The size of the memory block to allocate, in bytes. 86 /// 87 /// @returns Pointer to the allocated memory block of at least @ref size bytes. 88 /// If allocation fails, the function must return NULL. 89 void* (*allocate)(void* context, size_t size); 90 /// Pointer to a function to be called for general memory re-allocation, i.e. to increase or shrink a previously 91 /// allocated memory block. The content of the old memory block is copied to the new memory block. 92 /// 93 /// @param context - The user-specified pointer from xnn_allocator structure. 94 /// @param pointer - Pointer to a memory block allocated by @ref allocate or @ref reallocate functions. Can be NULL. 95 /// If the pointer is NULL, the @ref reallocate call is equivalent to an @ref allocate call. 96 /// @param size - The new size of the memory block to allocate, in bytes. 97 /// 98 /// @returns Pointer to the newly allocated memory block of at least @ref size bytes with the content of the previous 99 /// memory block. 100 /// If allocation fails, the function must return NULL, but must not release the previous memory block. 101 void* (*reallocate)(void* context, void* pointer, size_t size); 102 /// Pointer to a function to be called for general memory de-allocation. 103 /// 104 /// @param context - The user-specified pointer from xnn_allocator structure. 105 /// @param pointer - Pointer to a memory block allocated by @ref allocate or @ref reallocate functions. Can be NULL. 106 /// If the pointer is NULL, the @ref deallocate call is a no-op. 107 void (*deallocate)(void* context, void* pointer); 108 /// Pointer to a function to be called for aligned memory allocation. 109 /// 110 /// @param context - The user-specified pointer from xnn_allocator structure. 111 /// @param alignment - The alignment of the memory block to allocate, in bytes. Alignment is always a power-of-2. 112 /// @param size - The size of the memory block to allocate, in bytes. 113 /// 114 /// @returns Pointer to the allocated memory block of at least @ref size bytes. 115 /// If allocation fails, the function must return NULL. 116 void* (*aligned_allocate)(void* context, size_t alignment, size_t size); 117 /// Pointer to a function to be called for aligned memory de-allocation. 118 /// 119 /// @param context - The user-specified pointer from xnn_allocator structure. 120 /// @param pointer - Pointer to a memory block allocated by @ref aligned_allocate function. Can be NULL. 121 /// If the pointer is NULL, the @ref aligned_deallocate call is a no-op. 122 void (*aligned_deallocate)(void* context, void* pointer); 123 }; 124 125 /// Initialize XNNPACK library. 126 /// 127 /// XNNPACK must be successfully initialized before use. During initialization, XNNPACK populates internal structures 128 /// depending on the host processor. Initialization can be time-consuming. 129 /// 130 /// @param[in] allocator - structure with function pointers to be use for memory allocation and de-allocation. 131 /// If this argument is NULL, system-provided memory management functions (e.g. malloc/free) 132 /// will be used. 133 /// 134 /// @retval xnn_status_success - XNNPACK is successfully initialized and ready to use. 135 /// @retval xnn_status_out_of_memory - initialization failed due to out-of-memory condition. 136 /// @retval xnn_status_unsupported_hardware - initialization failed because the host processor does not satisfy the 137 /// minimum hardware requirements for XNNPACK. E.g. this may happen on x86 138 /// processors without SSE2 extension, or on 32-bit ARM processors without 139 /// the NEON SIMD extension. 140 enum xnn_status xnn_initialize(const struct xnn_allocator* allocator); 141 142 /// Deinitialize XNNPACK library. 143 /// 144 /// To avoid memory and resource leaks, users must call xnn_deinitialize once for each successful xnn_initialize call. 145 /// 146 /// @retval xnn_status_success - deinitialization call succeeded. 147 enum xnn_status xnn_deinitialize(void); 148 149 /// Subgraph is an abstract representation of a neural network model. 150 /// Subgraph objects are used to define Values (tensors) and Nodes (operators) comprising the model. 151 typedef struct xnn_subgraph* xnn_subgraph_t; 152 153 /// Create a empty Subgraph object. 154 /// 155 /// @param external_value_ids - number of Value IDs to reserve for communication with external graph representation. 156 /// The Subgraph object would avoid creating internal Value IDs in the 157 /// [0, reserved_value_ids-1] range. 158 /// @param flags - binary features of the subgraph. No supported flags are currently defined. 159 /// @param subgraph_out - pointer to the variable that will be initialized with a handle to the Subgraph object upon 160 /// successful return. 161 enum xnn_status xnn_create_subgraph( 162 uint32_t external_value_ids, 163 uint32_t flags, 164 xnn_subgraph_t* subgraph_out); 165 166 /// Destroy a Subgraph object, as well as Values, and Nodes associated with the subgraph. 167 /// 168 /// @param subgraph - the Subgraph object to destroy. 169 enum xnn_status xnn_delete_subgraph( 170 xnn_subgraph_t subgraph); 171 172 #define XNN_VALUE_FLAG_EXTERNAL_INPUT 0x00000001 173 #define XNN_VALUE_FLAG_EXTERNAL_OUTPUT 0x00000002 174 175 #define XNN_INVALID_VALUE_ID UINT32_MAX 176 177 /// Type of elements in a Value object. 178 enum xnn_datatype { 179 /// Invalid data type. Valid Values never have this datatype. 180 xnn_datatype_invalid = 0, 181 /// IEEE754 single-precision floating-point. 182 xnn_datatype_fp32 = 1, 183 /// IEEE754 half-precision floating-point. 184 xnn_datatype_fp16 = 2, 185 /// Quantized 8-bit signed integer with shared per-Value quantization parameters. 186 xnn_datatype_qint8 = 3, 187 /// Quantized 8-bit unsigned integer with shared per-Value quantization parameters. 188 xnn_datatype_quint8 = 4, 189 /// Quantized 32-bit signed integer with shared per-Value quantization parameters. 190 xnn_datatype_qint32 = 5, 191 /// Quantized 8-bit signed integer with shared per-channel quantization parameters. 192 xnn_datatype_qcint8 = 6, 193 /// Quantized 32-bit signed integer with shared per-channel quantization parameters. 194 xnn_datatype_qcint32 = 7, 195 }; 196 197 /// Define a tensor-type Value and add it to a Subgraph. 198 /// 199 /// @param subgraph - a Subgraph object that will own the created Value. 200 /// @param datatype - type of the tensor elements. 201 /// @param num_dims - number of dimensions in the shape. 202 /// @param dims - pointer to an array of @a num_dims shape dimensions. If num_dims is 0, this pointer can be NULL. 203 /// XNNPACK does not keep any pointers to this array after the function returns. 204 /// @param data - pointer to static data used for tensor initialization. If the tensor is not statically initialized, 205 /// this pointer must be is NULL. If non-NULL, the life-time of the static data must exceed the life-time 206 /// of the Subgraph object, and of any Runtime objects created from the Subgraph. 207 /// @param external_id - external ID for the Value. The ID must be within the range of reversed Value IDs specified on 208 /// the Subgraph creation. If the external ID is XNN_INVALID_VALUE_ID, an internal ID will be 209 /// created for the Value. 210 /// @param flags - binary features of the Value. Supported values are any combination of XNN_VALUE_FLAG_EXTERNAL_INPUT 211 /// and XNN_VALUE_FLAG_EXTERNAL_OUTPUT. 212 /// @param id_out - pointer to the variable that will be initialized with the Value ID upon successful return. If a 213 /// valid @a external_id was provided, the variable will be initialized with the @a external_id value. 214 enum xnn_status xnn_define_tensor_value( 215 xnn_subgraph_t subgraph, 216 enum xnn_datatype datatype, 217 size_t num_dims, 218 const size_t* dims, 219 const void* data, 220 uint32_t external_id, 221 uint32_t flags, 222 uint32_t* id_out); 223 224 /// Define a quantized tensor-type Value and add it to a Subgraph. 225 /// 226 /// @param subgraph - a Subgraph object that will own the created Value. 227 /// @param datatype - type of the tensor elements. 228 /// @param zero_point - offset from zero to subtract from the quantized elements in the Value. 229 /// @param scale - multiplication factor to convert quantized elements to real representation. 230 /// @param num_dims - number of dimensions in the shape. 231 /// @param dims - pointer to an array of @a num_dims shape dimensions. If num_dims is 0, this pointer can be NULL. 232 /// XNNPACK does not keep any pointers to this array after the function returns. 233 /// @param data - pointer to static data used for tensor initialization. If the tensor is not statically initialized, 234 /// this pointer must be is NULL. If non-NULL, the life-time of the static data must exceed the life-time 235 /// of the Subgraph object, and of any Runtime objects created from the Subgraph. 236 /// @param external_id - external ID for the Value. The ID must be within the range of reversed Value IDs specified on 237 /// the Subgraph creation. If the external ID is XNN_INVALID_VALUE_ID, an internal ID will be 238 /// created for the Value. 239 /// @param flags - binary features of the Value. Supported values are any combination of XNN_VALUE_FLAG_EXTERNAL_INPUT 240 /// and XNN_VALUE_FLAG_EXTERNAL_OUTPUT. 241 /// @param id_out - pointer to the variable that will be initialized with the Value ID upon successful return. If a 242 /// valid @a external_id was provided, the variable will be initialized with the @a external_id value. 243 enum xnn_status xnn_define_quantized_tensor_value( 244 xnn_subgraph_t subgraph, 245 enum xnn_datatype datatype, 246 int32_t zero_point, 247 float scale, 248 size_t num_dims, 249 const size_t* dims, 250 const void* data, 251 uint32_t external_id, 252 uint32_t flags, 253 uint32_t* id_out); 254 255 /// Define a channelwise quantized tensor-type Value and add it to a Subgraph. 256 /// 257 /// @param subgraph - a Subgraph object that will own the created Value. 258 /// @param datatype - type of the tensor elements. 259 /// @param scale - per-channel multiplication factors to convert quantized elements to real representation. 260 /// @param num_dims - number of dimensions in the shape. 261 /// @param channel_dim - index of the channel dimension in the tensor with per-channel quantization parameters. 262 /// Typically this is the first dimension (dimension #0) of the filter tensors in the Convolution, 263 /// Deconvolution, and Fully Connected operators and the last dimension of the filter tensors in 264 /// the Depthwise Convolution operators. 265 /// @param dims - pointer to an array of @a num_dims shape dimensions. If num_dims is 0, this pointer can be NULL. 266 /// XNNPACK does not keep any pointers to this array after the function returns. 267 /// @param data - pointer to static data used for tensor initialization. If the tensor is not statically initialized, 268 /// this pointer must be is NULL. If non-NULL, the life-time of the static data must exceed the life-time 269 /// of the Subgraph object, and of any Runtime objects created from the Subgraph. 270 /// @param external_id - external ID for the Value. The ID must be within the range of reversed Value IDs specified on 271 /// the Subgraph creation. If the external ID is XNN_INVALID_VALUE_ID, an internal ID will be 272 /// created for the Value. 273 /// @param flags - binary features of the Value. Supported values are any combination of XNN_VALUE_FLAG_EXTERNAL_INPUT 274 /// and XNN_VALUE_FLAG_EXTERNAL_OUTPUT. 275 /// @param id_out - pointer to the variable that will be initialized with the Value ID upon successful return. If a 276 /// valid @a external_id was provided, the variable will be initialized with the @a external_id value. 277 enum xnn_status xnn_define_channelwise_quantized_tensor_value( 278 xnn_subgraph_t subgraph, 279 enum xnn_datatype datatype, 280 const float* scale, 281 size_t num_dims, 282 size_t channel_dim, 283 const size_t* dims, 284 const void* data, 285 uint32_t external_id, 286 uint32_t flags, 287 uint32_t* id_out); 288 289 /// Define a Convert Node and add it to a Subgraph. 290 /// 291 /// @param subgraph - a Subgraph object that will own the created Node. 292 /// @param input_id - Value ID for the input tensor. The input tensor must be defined in the @a subgraph. 293 /// @param output_id - Value ID for the output tensor. The output tensor must be defined in the @a subgraph, and its 294 /// shape must match the shape of the input tensor. 295 /// @param flags - binary features of the Convert Node. No supported flags are currently defined. 296 enum xnn_status xnn_define_convert( 297 xnn_subgraph_t subgraph, 298 uint32_t input_id, 299 uint32_t output_id, 300 uint32_t flags); 301 302 /// Define a 2D Convolution Node and add it to a Subgraph. 303 /// 304 /// @param subgraph - a Subgraph object that will own the created Node. 305 /// @param input_padding_top - implicit zero-padding above 2D input data. Must be 0 if XNN_FLAG_TENSORFLOW_SAME_PADDING 306 /// flag is specified. 307 /// @param input_padding_right - implicit zero-padding to the right of 2D input data. Must be 0 if 308 /// XNN_FLAG_TENSORFLOW_SAME_PADDING flag is specified. 309 /// @param input_padding_bottom - implicit zero-padding below 2D input data. Must be 0 if 310 /// XNN_FLAG_TENSORFLOW_SAME_PADDING flag is specified. 311 /// @param input_padding_left - implicit zero-padding to the left of 2D input data. Must be 0 if 312 /// XNN_FLAG_TENSORFLOW_SAME_PADDING flag is specified. 313 /// @param kernel_height - kernel (filter) height. 314 /// @param kernel_width - kernel (filter) width. 315 /// @param subsampling_height - height of subsampling region for convolution output (convolution height stride). 316 /// @param subsampling_width - width of subsampling region for convolution output (convolution width stride). 317 /// @param dilation_height - dilation of kernel elements along the height dimension. 318 /// @param dilation_width - dilation of kernel elements along the width dimension. 319 /// @param groups - number of convolution groups. 320 /// @param group_input_channels - number of input channels per group. 321 /// @param group_output_channels - number of output channels per group. 322 /// @param output_min - lower bound for clipping output values. 323 /// @param output_max - upper bound for clipping output values. 324 /// @param input_id - Value ID for the input tensor. The input tensor must be a 4D tensor defined in the @a subgraph 325 /// with [N, IH, IW, groups * group_input_channels] dimensions 326 /// @param filter_id - Value ID for the filter tensor. The filter tensor must ge a 4D tensor defined in the @a subgraph 327 /// with [groups * group_output_channels, kernel_height, kernel_width, group_input_channels] 328 /// dimensions. 329 /// @param bias_id - Value ID for the bias tensor. The bias tensor must be a 1D tensor defined in the @a subgraph with 330 /// [groups * group_output_channels] dimensions. 331 /// @param output_id - Value ID for the output tensor. The output tensor must be a 4D tensor defined in the @a subgraph 332 /// with [N, OH, OW, groups * group_output_channels] dimensions. 333 /// @param flags - binary features of the 2D Convolution Node. The only currently supported values is 334 /// XNN_FLAG_TENSORFLOW_SAME_PADDING. 335 enum xnn_status xnn_define_convolution_2d( 336 xnn_subgraph_t subgraph, 337 uint32_t input_padding_top, 338 uint32_t input_padding_right, 339 uint32_t input_padding_bottom, 340 uint32_t input_padding_left, 341 uint32_t kernel_height, 342 uint32_t kernel_width, 343 uint32_t subsampling_height, 344 uint32_t subsampling_width, 345 uint32_t dilation_height, 346 uint32_t dilation_width, 347 uint32_t groups, 348 size_t group_input_channels, 349 size_t group_output_channels, 350 float output_min, 351 float output_max, 352 uint32_t input_id, 353 uint32_t filter_id, 354 uint32_t bias_id, 355 uint32_t output_id, 356 uint32_t flags); 357 358 /// Define a 2D Deconvolution (Transposed Convolution) Node and add it to a Subgraph. 359 /// 360 /// @param subgraph - a Subgraph object that will own the created Node. 361 /// @param padding_top - implicit padding above 2D output data. 362 /// @param padding_right - implicit padding to the right of 2D output data. 363 /// @param padding_bottom - implicit padding below 2D output data. 364 /// @param padding_left - implicit padding to the left of 2D output data. 365 /// @param adjustment_height - additional elements in the bottom of the 2D output data. 366 /// @param adjustment_width - additional elements to the right of the 2D output data. 367 /// @param kernel_height - kernel (filter) height. 368 /// @param kernel_width - kernel (filter) width. 369 /// @param upsampling_height - height of upsampling region for deconvolution input (deconvolution height stride). 370 /// @param upsampling_width - width of upsampling region for deconvolution input (deconvolution width stride). 371 /// @param dilation_height - dilation of kernel elements along the height dimension. 372 /// @param dilation_width - dilation of kernel elements along the width dimension. 373 /// @param groups - number of convolution groups. 374 /// @param group_input_channels - number of input channels per group. 375 /// @param group_output_channels - number of output channels per group. 376 /// @param output_min - lower bound for clipping output values. 377 /// @param output_max - upper bound for clipping output values. 378 /// @param input_id - Value ID for the input tensor. The input tensor must be a 4D tensor defined in the @a subgraph 379 /// with [N, IH, IW, groups * group_input_channels] dimensions 380 /// @param filter_id - Value ID for the filter tensor. The filter tensor must ge a 4D tensor defined in the @a subgraph 381 /// with [groups * group_output_channels, kernel_height, kernel_width, group_input_channels] 382 /// dimensions. 383 /// @param bias_id - Value ID for the bias tensor, or XNN_INVALID_VALUE_ID for a 2D Convolution Node without a bias. If 384 /// present, the bias tensor must be a 1D tensor defined in the @a subgraph with 385 /// [groups * group_output_channels] dimensions. 386 /// @param output_id - Value ID for the output tensor. The output tensor must be a 4D tensor defined in the @a subgraph 387 /// with [N, OH, OW, groups * group_output_channels] dimensions. 388 /// @param flags - binary features of the 2D Deconvolution Node. No supported flags are currently defined. 389 enum xnn_status xnn_define_deconvolution_2d( 390 xnn_subgraph_t subgraph, 391 uint32_t padding_top, 392 uint32_t padding_right, 393 uint32_t padding_bottom, 394 uint32_t padding_left, 395 uint32_t adjustment_height, 396 uint32_t adjustment_width, 397 uint32_t kernel_height, 398 uint32_t kernel_width, 399 uint32_t upsampling_height, 400 uint32_t upsampling_width, 401 uint32_t dilation_height, 402 uint32_t dilation_width, 403 uint32_t groups, 404 size_t group_input_channels, 405 size_t group_output_channels, 406 float output_min, 407 float output_max, 408 uint32_t input_id, 409 uint32_t filter_id, 410 uint32_t bias_id, 411 uint32_t output_id, 412 uint32_t flags); 413 414 /// Define a 2D Depthwise Convolution Node and add it to a Subgraph. 415 /// 416 /// @param subgraph - a Subgraph object that will own the created Node. 417 /// @param input_padding_top - implicit zero-padding above 2D input data. Must be 0 if XNN_FLAG_TENSORFLOW_SAME_PADDING 418 /// flag is specified. 419 /// @param input_padding_right - implicit zero-padding to the right of 2D input data. Must be 0 if 420 /// XNN_FLAG_TENSORFLOW_SAME_PADDING flag is specified. 421 /// @param input_padding_bottom - implicit zero-padding below 2D input data. Must be 0 if 422 /// XNN_FLAG_TENSORFLOW_SAME_PADDING flag is specified. 423 /// @param input_padding_left - implicit zero-padding to the left of 2D input data. Must be 0 if 424 /// XNN_FLAG_TENSORFLOW_SAME_PADDING flag is specified. 425 /// @param kernel_height - kernel (filter) height. 426 /// @param kernel_width - kernel (filter) width. 427 /// @param subsampling_height - height of subsampling region for convolution output (convolution height stride). 428 /// @param subsampling_width - width of subsampling region for convolution output (convolution width stride). 429 /// @param dilation_height - dilation of kernel elements along the height dimension. 430 /// @param dilation_width - dilation of kernel elements along the width dimension. 431 /// @param depth_multiplier - ratio of output channels to input channels. 432 /// @param input_channels - number of input channels. 433 /// @param output_min - lower bound for clipping output values. 434 /// @param output_max - upper bound for clipping output values. 435 /// @param input_id - Value ID for the input tensor. The input tensor must be a 4D tensor defined in the @a subgraph 436 /// with [N, IH, IW, input_channels] dimensions 437 /// @param filter_id - Value ID for the filter tensor. The filter tensor must ge a 4D tensor defined in the @a subgraph 438 /// with [1, kernel_height, kernel_width, input_channels * depth_multiplier] dimensions. 439 /// @param bias_id - Value ID for the bias tensor, or XNN_INVALID_VALUE_ID for a 2D Depthwise Convolution Node without 440 /// a bias. If present, the bias tensor must be a 1D tensor defined in the @a subgraph with 441 /// [input_channels * depth_multiplier] dimensions. 442 /// @param output_id - Value ID for the output tensor. The output tensor must be a 4D tensor defined in the @a subgraph 443 /// with [N, OH, OW, input_channels * depth_multiplier] dimensions. 444 /// @param flags - binary features of the 2D Depthwise Convolution Node. The only currently supported values is 445 /// XNN_FLAG_TENSORFLOW_SAME_PADDING. 446 enum xnn_status xnn_define_depthwise_convolution_2d( 447 xnn_subgraph_t subgraph, 448 uint32_t input_padding_top, 449 uint32_t input_padding_right, 450 uint32_t input_padding_bottom, 451 uint32_t input_padding_left, 452 uint32_t kernel_height, 453 uint32_t kernel_width, 454 uint32_t subsampling_height, 455 uint32_t subsampling_width, 456 uint32_t dilation_height, 457 uint32_t dilation_width, 458 uint32_t depth_multiplier, 459 size_t input_channels, 460 float output_min, 461 float output_max, 462 uint32_t input_id, 463 uint32_t filter_id, 464 uint32_t bias_id, 465 uint32_t output_id, 466 uint32_t flags); 467 468 /// Define a Depth To Space Node and add it to a Subgraph. 469 /// 470 /// The Depth To Space Node rearranges data from depth into blocks of spatial data (a reverse transform to 471 /// Space To Depth). For a given input pixel, an output square of pixels with side @a block_size is formed from values 472 /// in the corresponding number of its channels. The output depth is therefore @a block_size x @a block_size times 473 /// smaller than that of the input. 474 /// 475 /// @param subgraph - a Subgraph object that will own the created Node. 476 /// @param input_id - Value ID for the input tensor. The input tensor must be a 4D tensor defined in the @a subgraph 477 /// with [N, IH, IW, OC * block_size * block_size] dimensions. 478 /// @param output_id - Value ID for the output tensor. The output tensor must be a 4D tensor defined in the @a subgraph 479 /// with [N, IH * block_size, IW * block_size, OC] dimensions. 480 /// @param block_size - the size of the spatial block. 481 /// @param flags - binary features of the input_channels Node. No supported flags are currently defined. 482 enum xnn_status xnn_define_depth_to_space( 483 xnn_subgraph_t subgraph, 484 uint32_t input_id, 485 uint32_t output_id, 486 uint32_t block_size, 487 uint32_t flags); 488 489 /// Define a 2D Global Average Pooling Node and add it to a Subgraph. 490 /// 491 /// @param subgraph - a Subgraph object that will own the created Node. 492 /// @param output_min - lower bound for clipping output values. 493 /// @param output_max - upper bound for clipping output values. 494 /// @param input_id - Value ID for the input tensor. The input tensor must be a 4D tensor defined in the @a subgraph 495 /// with [N, H, W, C] dimensions 496 /// @param output_id - Value ID for the output tensor. The output tensor must be a 4D tensor defined in the @a subgraph 497 /// with [N, 1, 1, C] dimensions. 498 /// @param flags - binary features of the 2D Global Average Pooling Node. No supported flags are currently defined. 499 enum xnn_status xnn_define_global_average_pooling_2d( 500 xnn_subgraph_t subgraph, 501 float output_min, 502 float output_max, 503 uint32_t input_id, 504 uint32_t output_id, 505 uint32_t flags); 506 507 /// Define a 2D Average Pooling Node and add it to a Subgraph. 508 /// 509 /// @param subgraph - a Subgraph object that will own the created Node. 510 /// @param input_padding_top - implicit zero-padding above 2D input data. Must be 0 if XNN_FLAG_TENSORFLOW_SAME_PADDING 511 /// flag is specified. 512 /// @param input_padding_right - implicit zero-padding to the right of 2D input data. Must be 0 if 513 /// XNN_FLAG_TENSORFLOW_SAME_PADDING flag is specified. 514 /// @param input_padding_bottom - implicit zero-padding below 2D input data. Must be 0 if 515 /// XNN_FLAG_TENSORFLOW_SAME_PADDING flag is specified. 516 /// @param input_padding_left - implicit zero-padding to the left of 2D input data. Must be 0 if 517 /// XNN_FLAG_TENSORFLOW_SAME_PADDING flag is specified. 518 /// @param pooling_height - pooling (kernel) height. 519 /// @param pooling_width - pooling (kernel) width. 520 /// @param stride_height - displacing of the pooling window in the vertical dimension of the input pixels corresponding 521 /// to vertically adjacent output pixels. 522 /// @param stride_width - displacing of the pooling window in the horizontal dimension of the input pixels corresponding 523 /// to horizontally adjacent output pixels. 524 /// @param output_min - lower bound for clipping output values. 525 /// @param output_max - upper bound for clipping output values. 526 /// @param input_id - Value ID for the input tensor. The input tensor must be a 4D tensor defined in the @a subgraph 527 /// with [N, IH, IW, channels] dimensions 528 /// @param output_id - Value ID for the output tensor. The output tensor must be a 4D tensor defined in the @a subgraph 529 /// with [N, OH, OW, channels] dimensions. 530 /// @param flags - binary features of the 2D Average Pooling Node. The only currently supported values is 531 /// XNN_FLAG_TENSORFLOW_SAME_PADDING. 532 enum xnn_status xnn_define_average_pooling_2d( 533 xnn_subgraph_t subgraph, 534 uint32_t input_padding_top, 535 uint32_t input_padding_right, 536 uint32_t input_padding_bottom, 537 uint32_t input_padding_left, 538 uint32_t pooling_height, 539 uint32_t pooling_width, 540 uint32_t stride_height, 541 uint32_t stride_width, 542 float output_min, 543 float output_max, 544 uint32_t input_id, 545 uint32_t output_id, 546 uint32_t flags); 547 548 /// Define a Fully Connected Node and add it to a Subgraph. 549 /// 550 /// @param subgraph - a Subgraph object that will own the created Node. 551 /// @param output_min - lower bound for clipping output values. 552 /// @param output_max - upper bound for clipping output values. 553 /// @param input_id - Value ID for the input tensor. The input tensor must be an N-dimensional tensor defined in the 554 /// @a subgraph. If XNN_FLAG_TENSORFLOW_RESHAPE_2D is not specified, the input tensor must be at least 555 /// 1D and its last dimension must match the last dimension of the filter tensor. In particular, if 556 /// input is a 2D tensor, it must have [batch_size, input_channels] dimensions. 557 /// If XNN_FLAG_TENSORFLOW_RESHAPE_2D is specified, the number of elements in the input tensor must be 558 /// divisible by the input_channels. The tensor will be first flattened into a 1D tensor of 559 /// [num_input_elements] dimensions, then reshaped into a 2D tensor of 560 /// [num_input_elements / input_channels, input_channels] dimensions where num_input_elements is the 561 /// total number of elements in the input tensor. 562 /// @param filter_id - Value ID for the filter tensor. The filter tensor must a 2D tensor defined in the @a subgraph. 563 /// If the XNN_FLAG_TRANSPOSE_WEIGHTS flag is not specified, the filter tensor must have 564 /// [output_channels, input_channels] dimensions. If the XNN_FLAG_TRANSPOSE_WEIGHTS flag is 565 /// specified, the filter tensor must have [input_channels, output_channels] dimensions. 566 /// @param bias_id - Value ID for the bias tensor, or XNN_INVALID_VALUE_ID for a Fully Connected Node without a bias. 567 /// If present, the bias tensor must be a 1D tensor defined in the @a subgraph with [output_channels] 568 /// dimensions. 569 /// @param output_id - Value ID for the output tensor. The output tensor must be defined in the @a subgraph. 570 /// If XNN_FLAG_TENSORFLOW_RESHAPE_2D is not specified, the output tensor must have the same 571 /// dimensionality as the input tensor, all its dimensions but the last one must match the 572 /// corresponding dimensions of the input tensor, and the last dimensions of the output tensor must 573 /// match the first dimension of the filter tensor. In particular, if input is a 2D tensor, output 574 /// must be a 2D tensor of [batch_size, output_channels] dimensions. 575 /// If XNN_FLAG_TENSORFLOW_RESHAPE_2D is specified, output must be a 2D tensor of 576 /// [num_input_elements / input_channels, output_channels] dimensions where num_input_elements is the 577 /// total number of elements in the input tensor. 578 /// @param flags - binary features of the Fully Connected Node. The only currently supported values are 579 /// XNN_FLAG_TENSORFLOW_RESHAPE_2D and XNN_FLAG_TRANSPOSE_WEIGHTS. 580 enum xnn_status xnn_define_fully_connected( 581 xnn_subgraph_t subgraph, 582 float output_min, 583 float output_max, 584 uint32_t input_id, 585 uint32_t filter_id, 586 uint32_t bias_id, 587 uint32_t output_id, 588 uint32_t flags); 589 590 /// Define a 2D Max Pooling Node and add it to a Subgraph. 591 /// 592 /// @param subgraph - a Subgraph object that will own the created Node. 593 /// @param input_padding_top - implicit zero-padding above 2D input data. Must be 0 if XNN_FLAG_TENSORFLOW_SAME_PADDING 594 /// flag is specified. 595 /// @param input_padding_right - implicit zero-padding to the right of 2D input data. Must be 0 if 596 /// XNN_FLAG_TENSORFLOW_SAME_PADDING flag is specified. 597 /// @param input_padding_bottom - implicit zero-padding below 2D input data. Must be 0 if 598 /// XNN_FLAG_TENSORFLOW_SAME_PADDING flag is specified. 599 /// @param input_padding_left - implicit zero-padding to the left of 2D input data. Must be 0 if 600 /// XNN_FLAG_TENSORFLOW_SAME_PADDING flag is specified. 601 /// @param pooling_height - pooling (kernel) height. 602 /// @param pooling_width - pooling (kernel) width. 603 /// @param stride_height - displacing of the pooling window in the vertical dimension of the input pixels corresponding 604 /// to vertically adjacent output pixels. 605 /// @param stride_width - displacing of the pooling window in the horizontal dimension of the input pixels corresponding 606 /// to horizontally adjacent output pixels. 607 /// @param dilation_height - dilation of pooling elements along the height dimension. 608 /// @param dilation_width - dilation of pooling elements along the width dimension. 609 /// @param output_min - lower bound for clipping output values. 610 /// @param output_max - upper bound for clipping output values. 611 /// @param input_id - Value ID for the input tensor. The input tensor must be a 4D tensor defined in the @a subgraph 612 /// with [N, IH, IW, channels] dimensions 613 /// @param output_id - Value ID for the output tensor. The output tensor must be a 4D tensor defined in the @a subgraph 614 /// with [N, OH, OW, channels] dimensions. 615 /// @param flags - binary features of the 2D Max Pooling Node. The only currently supported values is 616 /// XNN_FLAG_TENSORFLOW_SAME_PADDING. 617 enum xnn_status xnn_define_max_pooling_2d( 618 xnn_subgraph_t subgraph, 619 uint32_t input_padding_top, 620 uint32_t input_padding_right, 621 uint32_t input_padding_bottom, 622 uint32_t input_padding_left, 623 uint32_t pooling_height, 624 uint32_t pooling_width, 625 uint32_t stride_height, 626 uint32_t stride_width, 627 uint32_t dilation_height, 628 uint32_t dilation_width, 629 float output_min, 630 float output_max, 631 uint32_t input_id, 632 uint32_t output_id, 633 uint32_t flags); 634 635 /// Define a 2D ArgMax Pooling Node and add it to a Subgraph. 636 /// 637 /// @param subgraph - a Subgraph object that will own the created Node. 638 /// @param input_padding_top - implicit zero-padding above 2D input data. 639 /// @param input_padding_right - implicit zero-padding to the right of 2D input data. 640 /// @param input_padding_bottom - implicit zero-padding below 2D input data. 641 /// @param input_padding_left - implicit zero-padding to the left of 2D input data. 642 /// @param pooling_height - pooling (kernel) height. Vertical stride between pooling regions match this value. 643 /// @param pooling_width - pooling (kernel) width. Horizontal stride between pooling regions match this value. 644 /// @param input_id - Value ID for the input tensor. The input tensor must be a 4D tensor defined in the @a subgraph 645 /// with [N, IH, IW, channels] dimensions 646 /// @param output_value_id - Value ID for the output tensor with the maximum values in the pools. The output tensor must 647 /// be a 4D tensor defined in the @a subgraph with [N, OH, OW, channels] dimensions. 648 /// @param output_index_id - Value ID for the output tensor with the indexes of the maximum values in the pools. The 649 /// output tensor must be a 4D tensor defined in the @a subgraph with [N, OH, OW, channels] 650 /// dimensions. 651 /// @param flags - binary features of the 2D ArgMax Pooling Node. No supported flags are currently defined. 652 enum xnn_status xnn_define_argmax_pooling_2d( 653 xnn_subgraph_t subgraph, 654 uint32_t input_padding_top, 655 uint32_t input_padding_right, 656 uint32_t input_padding_bottom, 657 uint32_t input_padding_left, 658 uint32_t pooling_height, 659 uint32_t pooling_width, 660 uint32_t input_id, 661 uint32_t output_value_id, 662 uint32_t output_index_id, 663 uint32_t flags); 664 665 /// Define a 2D UnPooling Node and add it to a Subgraph. 666 /// 667 /// @param subgraph - a Subgraph object that will own the created Node. 668 /// @param padding_top - implicit padding above 2D output data. 669 /// @param padding_right - implicit padding to the right of 2D output data. 670 /// @param padding_bottom - implicit padding below 2D output data. 671 /// @param padding_left - implicit padding to the left of 2D output data. 672 /// @param pooling_height - height of the pooling window. 673 /// @param pooling_width - width of the pooling window. 674 /// @param input_value_id - Value ID for the input tensor with the max-pooling values to invert. The input value tensor 675 /// must be a 4D tensor defined in the @a subgraph with [N, IH, IW, channels] dimensions. 676 /// @param input_index_id - Value ID for the input tensor with the indices of the per-pool maximum values produced by 677 /// a 2D UnPooling Node. The input tensor must be a 4D tensor defined in the @a subgraph with 678 /// [N, IH, IW, channels] dimensions. 679 /// @param output_id - Value ID for the output tensor. The output tensor must be a 4D tensor defined in the @a subgraph 680 /// with [N, OH, OW, channels] dimensions. 681 /// @param flags - binary features of the 2D UnPooling Node. No supported flags are currently defined. 682 enum xnn_status xnn_define_unpooling_2d( 683 xnn_subgraph_t subgraph, 684 uint32_t padding_top, 685 uint32_t padding_right, 686 uint32_t padding_bottom, 687 uint32_t padding_left, 688 uint32_t pooling_height, 689 uint32_t pooling_width, 690 uint32_t input_value_id, 691 uint32_t input_index_id, 692 uint32_t output_id, 693 uint32_t flags); 694 695 /// Define a 2-Input Add Node and add it to a Subgraph. 696 /// 697 /// The 2-Input Add Node computes elementwise addition of two tensor inputs with numpy broadcasting rules. 698 /// 699 /// @param subgraph - a Subgraph object that will own the created Node. 700 /// @param output_min - lower bound for clipping output values. 701 /// @param output_max - upper bound for clipping output values. 702 /// @param input1_id - Value ID for the first input tensor. The input tensor must be an N-dimensional tensor defined in 703 /// the @a subgraph with each dimension either equal to the corresponding dimension of the second 704 /// input, or equal to 1. In the latter case, the elements of the input tensor are broadcasted along 705 /// that dimension. 706 /// @param input2_id - Value ID for the second input tensor. The input tensor must be an M-dimensional tensor defined in 707 /// the @a subgraph with each dimension either equal to the corresponding dimension of the first 708 /// input, or equal to 1. In the latter case, the elements of the input tensor are broadcasted along 709 /// that dimension. 710 /// @param output_id - Value ID for the output tensor. The output tensor must be a max(N,M)-dimensional tensor defined 711 /// in the @a subgraph with each dimension equal to the maximum between the corresponding dimension 712 /// of the two inputs. 713 /// @param flags - binary features of the Add Node. No supported flags are currently defined. 714 enum xnn_status xnn_define_add2( 715 xnn_subgraph_t subgraph, 716 float output_min, 717 float output_max, 718 uint32_t input1_id, 719 uint32_t input2_id, 720 uint32_t output_id, 721 uint32_t flags); 722 723 /// Define a 2-Input Multiply Node and add it to a Subgraph. 724 /// 725 /// The 2-Input Multiply Node computes elementwise multiplication of two tensor inputs with numpy broadcasting rules. 726 /// 727 /// @param subgraph - a Subgraph object that will own the created Node. 728 /// @param output_min - lower bound for clipping output values. 729 /// @param output_max - upper bound for clipping output values. 730 /// @param input1_id - Value ID for the first input tensor. The input tensor must be an N-dimensional tensor defined in 731 /// the @a subgraph with each dimension either equal to the corresponding dimension of the second 732 /// input, or equal to 1. In the latter case, the elements of the input tensor are broadcasted along 733 /// that dimension. 734 /// @param input2_id - Value ID for the second input tensor. The input tensor must be an M-dimensional tensor defined in 735 /// the @a subgraph with each dimension either equal to the corresponding dimension of the first 736 /// input, or equal to 1. In the latter case, the elements of the input tensor are broadcasted along 737 /// that dimension. 738 /// @param output_id - Value ID for the output tensor. The output tensor must be a max(N,M)-dimensional tensor defined 739 /// in the @a subgraph with each dimension equal to the maximum between the corresponding dimension 740 /// of the two inputs. 741 /// @param flags - binary features of the Multiply Node. No supported flags are currently defined. 742 enum xnn_status xnn_define_multiply2( 743 xnn_subgraph_t subgraph, 744 float output_min, 745 float output_max, 746 uint32_t input1_id, 747 uint32_t input2_id, 748 uint32_t output_id, 749 uint32_t flags); 750 751 /// Define a Subtract Node and add it to a Subgraph. 752 /// 753 /// The Subtract Node computes elementwise subtraction of two tensor inputs with numpy broadcasting rules. 754 /// 755 /// @param subgraph - a Subgraph object that will own the created Node. 756 /// @param output_min - lower bound for clipping output values. 757 /// @param output_max - upper bound for clipping output values. 758 /// @param input1_id - Value ID for the first input tensor. The input tensor must be an N-dimensional tensor defined in 759 /// the @a subgraph with each dimension either equal to the corresponding dimension of the second 760 /// input, or equal to 1. In the latter case, the elements of the input tensor are broadcasted along 761 /// that dimension. 762 /// @param input2_id - Value ID for the second input tensor. The input tensor must be an M-dimensional tensor defined in 763 /// the @a subgraph with each dimension either equal to the corresponding dimension of the first 764 /// input, or equal to 1. In the latter case, the elements of the input tensor are broadcasted along 765 /// that dimension. 766 /// @param output_id - Value ID for the output tensor. The output tensor must be a max(N,M)-dimensional tensor defined 767 /// in the @a subgraph with each dimension equal to the maximum between the corresponding dimension 768 /// of the two inputs. 769 /// @param flags - binary features of the Subtract Node. No supported flags are currently defined. 770 enum xnn_status xnn_define_subtract( 771 xnn_subgraph_t subgraph, 772 float output_min, 773 float output_max, 774 uint32_t input1_id, 775 uint32_t input2_id, 776 uint32_t output_id, 777 uint32_t flags); 778 779 /// Define a Divide Node and add it to a Subgraph. 780 /// 781 /// The Divide Node computes elementwise division of two tensor inputs with numpy broadcasting rules. 782 /// 783 /// @param subgraph - a Subgraph object that will own the created Node. 784 /// @param output_min - lower bound for clipping output values. 785 /// @param output_max - upper bound for clipping output values. 786 /// @param input1_id - Value ID for the first input tensor. The input tensor must be an N-dimensional tensor defined in 787 /// the @a subgraph with each dimension either equal to the corresponding dimension of the second 788 /// input, or equal to 1. In the latter case, the elements of the input tensor are broadcasted along 789 /// that dimension. 790 /// @param input2_id - Value ID for the second input tensor. The input tensor must be an M-dimensional tensor defined in 791 /// the @a subgraph with each dimension either equal to the corresponding dimension of the first 792 /// input, or equal to 1. In the latter case, the elements of the input tensor are broadcasted along 793 /// that dimension. 794 /// @param output_id - Value ID for the output tensor. The output tensor must be a max(N,M)-dimensional tensor defined 795 /// in the @a subgraph with each dimension equal to the maximum between the corresponding dimension 796 /// of the two inputs. 797 /// @param flags - binary features of the Divide Node. No supported flags are currently defined. 798 enum xnn_status xnn_define_divide( 799 xnn_subgraph_t subgraph, 800 float output_min, 801 float output_max, 802 uint32_t input1_id, 803 uint32_t input2_id, 804 uint32_t output_id, 805 uint32_t flags); 806 807 /// Define a 2-Input Maximum Node and add it to a Subgraph. 808 /// 809 /// The 2-Input Maximum Node computes elementwise maximum of two tensor inputs with numpy broadcasting rules. 810 /// 811 /// @param subgraph - a Subgraph object that will own the created Node. 812 /// @param input1_id - Value ID for the first input tensor. The input tensor must be an N-dimensional tensor defined in 813 /// the @a subgraph with each dimension either equal to the corresponding dimension of the second 814 /// input, or equal to 1. In the latter case, the elements of the input tensor are broadcasted along 815 /// that dimension. 816 /// @param input2_id - Value ID for the second input tensor. The input tensor must be an M-dimensional tensor defined in 817 /// the @a subgraph with each dimension either equal to the corresponding dimension of the first 818 /// input, or equal to 1. In the latter case, the elements of the input tensor are broadcasted along 819 /// that dimension. 820 /// @param output_id - Value ID for the output tensor. The output tensor must be a max(N,M)-dimensional tensor defined 821 /// in the @a subgraph with each dimension equal to the maximum between the corresponding dimension 822 /// of the two inputs. 823 /// @param flags - binary features of the Maximum Node. No supported flags are currently defined. 824 enum xnn_status xnn_define_maximum2( 825 xnn_subgraph_t subgraph, 826 uint32_t input1_id, 827 uint32_t input2_id, 828 uint32_t output_id, 829 uint32_t flags); 830 831 /// Define a 2-Input Minimum Node and add it to a Subgraph. 832 /// 833 /// The 2-Input Minimum Node computes elementwise minimum of two tensor inputs with numpy broadcasting rules. 834 /// 835 /// @param subgraph - a Subgraph object that will own the created Node. 836 /// @param input1_id - Value ID for the first input tensor. The input tensor must be an N-dimensional tensor defined in 837 /// the @a subgraph with each dimension either equal to the corresponding dimension of the second 838 /// input, or equal to 1. In the latter case, the elements of the input tensor are broadcasted along 839 /// that dimension. 840 /// @param input2_id - Value ID for the second input tensor. The input tensor must be an M-dimensional tensor defined in 841 /// the @a subgraph with each dimension either equal to the corresponding dimension of the first 842 /// input, or equal to 1. In the latter case, the elements of the input tensor are broadcasted along 843 /// that dimension. 844 /// @param output_id - Value ID for the output tensor. The output tensor must be a max(N,M)-dimensional tensor defined 845 /// in the @a subgraph with each dimension equal to the maximum between the corresponding dimension 846 /// of the two inputs. 847 /// @param flags - binary features of the Minimum Node. No supported flags are currently defined. 848 enum xnn_status xnn_define_minimum2( 849 xnn_subgraph_t subgraph, 850 uint32_t input1_id, 851 uint32_t input2_id, 852 uint32_t output_id, 853 uint32_t flags); 854 855 /// Define a Squared Difference Node and add it to a Subgraph. 856 /// 857 /// The Squared Difference Node computes elementwise squared difference of two tensor inputs with numpy broadcasting 858 /// rules. 859 /// 860 /// @param subgraph - a Subgraph object that will own the created Node. 861 /// @param input1_id - Value ID for the first input tensor. The input tensor must be an N-dimensional tensor defined in 862 /// the @a subgraph with each dimension either equal to the corresponding dimension of the second 863 /// input, or equal to 1. In the latter case, the elements of the input tensor are broadcasted along 864 /// that dimension. 865 /// @param input2_id - Value ID for the second input tensor. The input tensor must be an M-dimensional tensor defined in 866 /// the @a subgraph with each dimension either equal to the corresponding dimension of the first 867 /// input, or equal to 1. In the latter case, the elements of the input tensor are broadcasted along 868 /// that dimension. 869 /// @param output_id - Value ID for the output tensor. The output tensor must be a max(N,M)-dimensional tensor defined 870 /// in the @a subgraph with each dimension equal to the maximum between the corresponding dimension 871 /// of the two inputs. 872 /// @param flags - binary features of the Squared Difference Node. No supported flags are currently defined. 873 enum xnn_status xnn_define_squared_difference( 874 xnn_subgraph_t subgraph, 875 uint32_t input1_id, 876 uint32_t input2_id, 877 uint32_t output_id, 878 uint32_t flags); 879 880 /// Define a Constant Pad Node with static padding specification and add it to a Subgraph. 881 /// 882 /// @param subgraph - a Subgraph object that will own the created Node. 883 /// @param pre_paddings - number of padding elements to insert before input elements for every dimension. This array 884 /// must have as many elements as the the number of dimensions in the input tensor. 885 /// @param post_paddings - number of padding elements to insert after input elements for every dimension. This array 886 /// must have as many elements as the the number of dimensions in the input tensor. 887 /// @param padding_value - constant value used to initialize padding elements. 888 /// @param input_id - Value ID for the input tensor. The input tensor must be defined in the @a subgraph. 889 /// @param output_id - Value ID for the output tensor. The output tensor must be defined in the @a subgraph, and its 890 /// shape must match the shape of the input tensor with padding. 891 /// @param flags - binary features of the Constant Pad Node. No supported flags are currently defined. 892 enum xnn_status xnn_define_static_constant_pad( 893 xnn_subgraph_t subgraph, 894 const size_t* pre_paddings, 895 const size_t* post_paddings, 896 float padding_value, 897 uint32_t input_id, 898 uint32_t output_id, 899 uint32_t flags); 900 901 /// Define a Reshape Node with static shape specification and add it to a Subgraph. 902 /// 903 /// @param subgraph - a Subgraph object that will own the created Node. 904 /// @param num_dims - number of shape dimensions in the output tensor. 905 /// @param new_shape - shape dimensions of the output tensor. 906 /// @param input_id - Value ID for the input tensor. The input tensor must be defined in the @a subgraph. 907 /// @param output_id - Value ID for the output tensor. The output tensor must be defined in the @a subgraph, and its 908 /// shape must match the shape of the input tensor with padding. 909 /// @param flags - binary features of the Reshape Node. No supported flags are currently defined. 910 enum xnn_status xnn_define_static_reshape( 911 xnn_subgraph_t subgraph, 912 size_t num_dims, 913 const size_t* new_shape, 914 uint32_t input_id, 915 uint32_t output_id, 916 uint32_t flags); 917 918 /// Define a 2D Resize Bilinear Node with static output height & width specification and add it to a Subgraph. 919 /// 920 /// @param subgraph - a Subgraph object that will own the created Node. 921 /// @param new_height - height dimension of the output tensor. 922 /// @param new_width - width dimension of the output tensor. 923 /// @param input_id - Value ID for the input tensor. The input tensor must be a 4D tensor defined in the @a subgraph 924 /// with [N, H, W, C] dimensions. 925 /// @param output_id - Value ID for the output tensor. The output tensor must be a 4D tensor defined in the @a subgraph 926 /// with [N, new_height, new_width, C] dimensions. 927 /// @param flags - binary features of the 2D Resize Bilinear Node. The only currently supported values are 928 /// XNN_FLAG_TENSORFLOW_LEGACY_MODE and XNN_FLAG_ALIGN_CORNERS, which are mutually exclusive. 929 enum xnn_status xnn_define_static_resize_bilinear_2d( 930 xnn_subgraph_t subgraph, 931 size_t new_height, 932 size_t new_width, 933 uint32_t input_id, 934 uint32_t output_id, 935 uint32_t flags); 936 937 /// Define a PReLU (Parametric ReLU) Node and add it to a Subgraph. 938 /// 939 /// @param subgraph - a Subgraph object that will own the created Node. 940 /// @param input_id - Value ID for the input tensor. The input tensor must be a 4D tensor defined in the @a subgraph 941 /// with [N, H, W, channels] dimensions. 942 /// @param slope_id - Value ID for the bias tensor. The bias tensor must be a 1D tensor defined in the @a subgraph with 943 /// [channels] dimensions. 944 /// @param output_id - Value ID for the output tensor. The output tensor must be a 4D tensor defined in the @a subgraph 945 /// with [N, H, W, channels] dimensions. 946 /// @param flags - binary features of the PReLU Node. No supported flags are currently defined. 947 enum xnn_status xnn_define_prelu( 948 xnn_subgraph_t subgraph, 949 uint32_t input_id, 950 uint32_t slope_id, 951 uint32_t output_id, 952 uint32_t flags); 953 954 /// Define a Abs Node and add it to a Subgraph. 955 /// 956 /// @param subgraph - a Subgraph object that will own the created Node. 957 /// @param input_id - Value ID for the input tensor. The input tensor must be defined in the @a subgraph. 958 /// @param output_id - Value ID for the output tensor. The output tensor must be defined in the @a subgraph, and its 959 /// shape must match the shape of the input tensor. 960 /// @param flags - binary features of the Abs Node. No supported flags are currently defined. 961 enum xnn_status xnn_define_abs( 962 xnn_subgraph_t subgraph, 963 uint32_t input_id, 964 uint32_t output_id, 965 uint32_t flags); 966 967 /// Define a Bankers' Rounding Node and add it to a Subgraph. 968 /// 969 /// @param subgraph - a Subgraph object that will own the created Node. 970 /// @param input_id - Value ID for the input tensor. The input tensor must be defined in the @a subgraph. 971 /// @param output_id - Value ID for the output tensor. The output tensor must be defined in the @a subgraph, and its 972 /// shape must match the shape of the input tensor. 973 /// @param flags - binary features of the Bankers' Rounding Node. No supported flags are currently defined. 974 enum xnn_status xnn_define_bankers_rounding( 975 xnn_subgraph_t subgraph, 976 uint32_t input_id, 977 uint32_t output_id, 978 uint32_t flags); 979 980 /// Define a Ceiling Node and add it to a Subgraph. 981 /// 982 /// @param subgraph - a Subgraph object that will own the created Node. 983 /// @param input_id - Value ID for the input tensor. The input tensor must be defined in the @a subgraph. 984 /// @param output_id - Value ID for the output tensor. The output tensor must be defined in the @a subgraph, and its 985 /// shape must match the shape of the input tensor. 986 /// @param flags - binary features of the Ceiling Node. No supported flags are currently defined. 987 enum xnn_status xnn_define_ceiling( 988 xnn_subgraph_t subgraph, 989 uint32_t input_id, 990 uint32_t output_id, 991 uint32_t flags); 992 993 /// Define a Clamp Node and add it to a Subgraph. 994 /// 995 /// @param subgraph - a Subgraph object that will own the created Node. 996 /// @param output_min - lower bound for clipping output values. 997 /// @param output_max - upper bound for clipping output values. 998 /// @param input_id - Value ID for the input tensor. The input tensor must be defined in the @a subgraph. 999 /// @param output_id - Value ID for the output tensor. The output tensor must be defined in the @a subgraph, and its 1000 /// shape must match the shape of the input tensor. 1001 /// @param flags - binary features of the Clamp Node. No supported flags are currently defined. 1002 enum xnn_status xnn_define_clamp( 1003 xnn_subgraph_t subgraph, 1004 float output_min, 1005 float output_max, 1006 uint32_t input_id, 1007 uint32_t output_id, 1008 uint32_t flags); 1009 1010 /// Define an ELU (Exponential Linear Unit) Node and add it to a Subgraph. 1011 /// 1012 /// @param subgraph - a Subgraph object that will own the created Node. 1013 /// @param alpha - scale factor for negative output elements. 1014 /// @param input_id - Value ID for the input tensor. The input tensor must be defined in the @a subgraph. 1015 /// @param output_id - Value ID for the output tensor. The output tensor must be defined in the @a subgraph, and its 1016 /// shape must match the shape of the input tensor. 1017 /// @param flags - binary features of the ELU Node. No supported flags are currently defined. 1018 enum xnn_status xnn_define_elu( 1019 xnn_subgraph_t subgraph, 1020 float alpha, 1021 uint32_t input_id, 1022 uint32_t output_id, 1023 uint32_t flags); 1024 1025 /// Define a Floor Node and add it to a Subgraph. 1026 /// 1027 /// @param subgraph - a Subgraph object that will own the created Node. 1028 /// @param input_id - Value ID for the input tensor. The input tensor must be defined in the @a subgraph. 1029 /// @param output_id - Value ID for the output tensor. The output tensor must be defined in the @a subgraph, and its 1030 /// shape must match the shape of the input tensor. 1031 /// @param flags - binary features of the Floor Node. No supported flags are currently defined. 1032 enum xnn_status xnn_define_floor( 1033 xnn_subgraph_t subgraph, 1034 uint32_t input_id, 1035 uint32_t output_id, 1036 uint32_t flags); 1037 1038 /// Define a HardSwish Node and add it to a Subgraph. 1039 /// 1040 /// @param subgraph - a Subgraph object that will own the created Node. 1041 /// @param input_id - Value ID for the input tensor. The input tensor must be defined in the @a subgraph. 1042 /// @param output_id - Value ID for the output tensor. The output tensor must be defined in the @a subgraph, and its 1043 /// shape must match the shape of the input tensor. 1044 /// @param flags - binary features of the HardSwish Node. No supported flags are currently defined. 1045 enum xnn_status xnn_define_hardswish( 1046 xnn_subgraph_t subgraph, 1047 uint32_t input_id, 1048 uint32_t output_id, 1049 uint32_t flags); 1050 1051 /// Define a Leaky ReLU Node and add it to a Subgraph. 1052 /// 1053 /// @param subgraph - a Subgraph object that will own the created Node. 1054 /// @param negative_slope - scale factor for negative input elements. 1055 /// @param input_id - Value ID for the input tensor. The input tensor must be defined in the @a subgraph. 1056 /// @param output_id - Value ID for the output tensor. The output tensor must be defined in the @a subgraph, and its 1057 /// shape must match the shape of the input tensor. 1058 /// @param flags - binary features of the Leaky ReLU Node. No supported flags are currently defined. 1059 enum xnn_status xnn_define_leaky_relu( 1060 xnn_subgraph_t subgraph, 1061 float negative_slope, 1062 uint32_t input_id, 1063 uint32_t output_id, 1064 uint32_t flags); 1065 1066 /// Define a Negate Node and add it to a Subgraph. 1067 /// 1068 /// @param subgraph - a Subgraph object that will own the created Node. 1069 /// @param input_id - Value ID for the input tensor. The input tensor must be defined in the @a subgraph. 1070 /// @param output_id - Value ID for the output tensor. The output tensor must be defined in the @a subgraph, and its 1071 /// shape must match the shape of the input tensor. 1072 /// @param flags - binary features of the Negate Node. No supported flags are currently defined. 1073 enum xnn_status xnn_define_negate( 1074 xnn_subgraph_t subgraph, 1075 uint32_t input_id, 1076 uint32_t output_id, 1077 uint32_t flags); 1078 1079 /// Define a Sigmoid Node and add it to a Subgraph. 1080 /// 1081 /// @param subgraph - a Subgraph object that will own the created Node. 1082 /// @param input_id - Value ID for the input tensor. The input tensor must be defined in the @a subgraph. 1083 /// @param output_id - Value ID for the output tensor. The output tensor must be defined in the @a subgraph, and its 1084 /// shape must match the shape of the input tensor. 1085 /// @param flags - binary features of the Sigmoid Node. No supported flags are currently defined. 1086 enum xnn_status xnn_define_sigmoid( 1087 xnn_subgraph_t subgraph, 1088 uint32_t input_id, 1089 uint32_t output_id, 1090 uint32_t flags); 1091 1092 /// Define a SoftMax Node and add it to a Subgraph. 1093 /// 1094 /// @param subgraph - a Subgraph object that will own the created Node. 1095 /// @param input_id - Value ID for the input tensor. The input tensor must be defined in the @a subgraph, and have at 1096 /// least one dimension. 1097 /// @param output_id - Value ID for the output tensor. The output tensor must be defined in the @a subgraph, and its 1098 /// shape must match the shape of the input tensor. 1099 /// @param flags - binary features of the SoftMax Node. No supported flags are currently defined. 1100 enum xnn_status xnn_define_softmax( 1101 xnn_subgraph_t subgraph, 1102 uint32_t input_id, 1103 uint32_t output_id, 1104 uint32_t flags); 1105 1106 /// Define a Square Node and add it to a Subgraph. 1107 /// 1108 /// @param subgraph - a Subgraph object that will own the created Node. 1109 /// @param input_id - Value ID for the input tensor. The input tensor must be defined in the @a subgraph. 1110 /// @param output_id - Value ID for the output tensor. The output tensor must be defined in the @a subgraph, and its 1111 /// shape must match the shape of the input tensor. 1112 /// @param flags - binary features of the Square Node. No supported flags are currently defined. 1113 enum xnn_status xnn_define_square( 1114 xnn_subgraph_t subgraph, 1115 uint32_t input_id, 1116 uint32_t output_id, 1117 uint32_t flags); 1118 1119 /// Define a Square Root Node and add it to a Subgraph. 1120 /// 1121 /// @param subgraph - a Subgraph object that will own the created Node. 1122 /// @param input_id - Value ID for the input tensor. The input tensor must be defined in the @a subgraph. 1123 /// @param output_id - Value ID for the output tensor. The output tensor must be defined in the @a subgraph, and its 1124 /// shape must match the shape of the input tensor. 1125 /// @param flags - binary features of the Square Root Node. No supported flags are currently defined. 1126 enum xnn_status xnn_define_square_root( 1127 xnn_subgraph_t subgraph, 1128 uint32_t input_id, 1129 uint32_t output_id, 1130 uint32_t flags); 1131 1132 /// Runtime is a combination of an execution plan for subgraph Nodes and a memory manager for subgraph Values. 1133 typedef struct xnn_runtime* xnn_runtime_t; 1134 1135 /// Create a Runtime object from a subgraph. 1136 /// 1137 /// @param subgraph - a Subgraph object with all Values and Nodes that would be handled by the runtime. No Values or 1138 /// Nodes can be added to the runtime once it is constructed. 1139 /// @param threadpool - the thread pool to be used for parallelisation of computations in the runtime. If the thread 1140 /// pool is NULL, the computation would run on the caller thread without parallelization. 1141 /// @param flags - binary features of the runtime. The only currently supported values are XNN_FLAG_SPARSE_INFERENCE, 1142 /// XNN_FLAG_FP16_INFERENCE, and XNN_FLAG_YIELD_WORKERS. If XNN_FLAG_YIELD_WORKERS is specified, worker 1143 /// threads would be yielded to the system scheduler after processing the last operator in the Runtime. 1144 /// @param runtime_out - pointer to the variable that will be initialized with a handle to the Runtime object upon 1145 /// successful return. Once constructed, the Runtime object is independent of the Subgraph object 1146 /// used to create it. 1147 enum xnn_status xnn_create_runtime_v2( 1148 xnn_subgraph_t subgraph, 1149 pthreadpool_t threadpool, 1150 uint32_t flags, 1151 xnn_runtime_t* runtime_out); 1152 1153 enum xnn_status xnn_create_runtime( 1154 xnn_subgraph_t subgraph, 1155 xnn_runtime_t* runtime_out); 1156 1157 struct xnn_external_value { 1158 uint32_t id; 1159 void* data; 1160 }; 1161 1162 /// Setup data pointers for external inputs and outputs in a Runtime object. 1163 /// 1164 /// @param runtime - a Runtime object created with @ref xnn_create_runtime or @ref xnn_create_runtime_v2. 1165 /// @param num_external_values - the number of external inputs and outputs specified in this call. This number must 1166 /// match the number of external inputs and outputs in the runtime, i.e. all external 1167 /// inputs and outputs in the runtime must be specified in one call. 1168 /// @param external_values - array with location information for all external inputs and outputs in the runtime. 1169 enum xnn_status xnn_setup_runtime( 1170 xnn_runtime_t runtime, 1171 size_t num_external_values, 1172 const struct xnn_external_value* external_values); 1173 1174 /// Execute forward pass for all operators in the runtime. 1175 /// 1176 /// @param runtime - the Runtime object with the execution plan to invoke. 1177 enum xnn_status xnn_invoke_runtime( 1178 xnn_runtime_t runtime); 1179 1180 /// Destroy a Runtime object, as well as operators and memory associated with it. 1181 /// 1182 /// @param runtime - the Runtime object to destroy. 1183 enum xnn_status xnn_delete_runtime( 1184 xnn_runtime_t runtime); 1185 1186 typedef struct xnn_operator* xnn_operator_t; 1187 1188 enum xnn_status xnn_run_operator( 1189 xnn_operator_t op, 1190 pthreadpool_t threadpool); 1191 1192 enum xnn_status xnn_delete_operator( 1193 xnn_operator_t op); 1194 1195 #ifndef XNN_NO_F32_OPERATORS 1196 1197 enum xnn_status xnn_create_abs_nc_f32( 1198 size_t channels, 1199 size_t input_stride, 1200 size_t output_stride, 1201 uint32_t flags, 1202 xnn_operator_t* abs_op_out); 1203 1204 enum xnn_status xnn_setup_abs_nc_f32( 1205 xnn_operator_t abs_op, 1206 size_t batch_size, 1207 const float* input, 1208 float* output, 1209 pthreadpool_t threadpool); 1210 1211 enum xnn_status xnn_create_add_nd_f32( 1212 float output_min, 1213 float output_max, 1214 uint32_t flags, 1215 xnn_operator_t* add_op_out); 1216 1217 enum xnn_status xnn_setup_add_nd_f32( 1218 xnn_operator_t add_op, 1219 size_t num_input1_dims, 1220 const size_t* input1_shape, 1221 size_t num_input2_dims, 1222 const size_t* input2_shape, 1223 const float* input1, 1224 const float* input2, 1225 float* output, 1226 pthreadpool_t threadpool); 1227 1228 enum xnn_status xnn_create_argmax_pooling2d_nhwc_f32( 1229 uint32_t input_padding_top, 1230 uint32_t input_padding_right, 1231 uint32_t input_padding_bottom, 1232 uint32_t input_padding_left, 1233 uint32_t pooling_height, 1234 uint32_t pooling_width, 1235 size_t channels, 1236 size_t input_pixel_stride, 1237 size_t output_pixel_stride, 1238 uint32_t flags, 1239 xnn_operator_t* argmax_pooling_op_out); 1240 1241 enum xnn_status xnn_setup_argmax_pooling2d_nhwc_f32( 1242 xnn_operator_t argmax_pooling_op, 1243 size_t batch_size, 1244 size_t input_height, 1245 size_t input_width, 1246 const float* input, 1247 float* output, 1248 uint32_t* index, 1249 pthreadpool_t threadpool); 1250 1251 enum xnn_status xnn_create_average_pooling2d_nhwc_f32( 1252 uint32_t input_padding_top, 1253 uint32_t input_padding_right, 1254 uint32_t input_padding_bottom, 1255 uint32_t input_padding_left, 1256 uint32_t pooling_height, 1257 uint32_t pooling_width, 1258 uint32_t stride_height, 1259 uint32_t stride_width, 1260 size_t channels, 1261 size_t input_pixel_stride, 1262 size_t output_pixel_stride, 1263 float output_min, 1264 float output_max, 1265 uint32_t flags, 1266 xnn_operator_t* average_pooling_op_out); 1267 1268 enum xnn_status xnn_setup_average_pooling2d_nhwc_f32( 1269 xnn_operator_t average_pooling_op, 1270 size_t batch_size, 1271 size_t input_height, 1272 size_t input_width, 1273 const float* input, 1274 float* output, 1275 pthreadpool_t threadpool); 1276 1277 enum xnn_status xnn_create_bankers_rounding_nc_f32( 1278 size_t channels, 1279 size_t input_stride, 1280 size_t output_stride, 1281 uint32_t flags, 1282 xnn_operator_t* rounding_op_out); 1283 1284 enum xnn_status xnn_setup_bankers_rounding_nc_f32( 1285 xnn_operator_t rounding_op, 1286 size_t batch_size, 1287 const float* input, 1288 float* output, 1289 pthreadpool_t threadpool); 1290 1291 enum xnn_status xnn_create_ceiling_nc_f32( 1292 size_t channels, 1293 size_t input_stride, 1294 size_t output_stride, 1295 uint32_t flags, 1296 xnn_operator_t* ceiling_op_out); 1297 1298 enum xnn_status xnn_setup_ceiling_nc_f32( 1299 xnn_operator_t ceiling_op, 1300 size_t batch_size, 1301 const float* input, 1302 float* output, 1303 pthreadpool_t threadpool); 1304 1305 enum xnn_status xnn_create_clamp_nc_f32( 1306 size_t channels, 1307 size_t input_stride, 1308 size_t output_stride, 1309 float output_min, 1310 float output_max, 1311 uint32_t flags, 1312 xnn_operator_t* clamp_op_out); 1313 1314 enum xnn_status xnn_setup_clamp_nc_f32( 1315 xnn_operator_t clamp_op, 1316 size_t batch_size, 1317 const float* input, 1318 float* output, 1319 pthreadpool_t threadpool); 1320 1321 enum xnn_status xnn_create_convolution2d_nhwc_f32( 1322 uint32_t input_padding_top, 1323 uint32_t input_padding_right, 1324 uint32_t input_padding_bottom, 1325 uint32_t input_padding_left, 1326 uint32_t kernel_height, 1327 uint32_t kernel_width, 1328 uint32_t subsampling_height, 1329 uint32_t subsampling_width, 1330 uint32_t dilation_height, 1331 uint32_t dilation_width, 1332 uint32_t groups, 1333 size_t group_input_channels, 1334 size_t group_output_channels, 1335 size_t input_channel_stride, 1336 size_t output_channel_stride, 1337 const float* kernel, 1338 const float* bias, 1339 float output_min, 1340 float output_max, 1341 uint32_t flags, 1342 xnn_operator_t* convolution_op_out); 1343 1344 enum xnn_status xnn_setup_convolution2d_nhwc_f32( 1345 xnn_operator_t convolution_op, 1346 size_t batch_size, 1347 size_t input_height, 1348 size_t input_width, 1349 const float* input, 1350 float* output, 1351 pthreadpool_t threadpool); 1352 1353 enum xnn_status xnn_create_deconvolution2d_nhwc_f32( 1354 uint32_t output_padding_top, 1355 uint32_t output_padding_right, 1356 uint32_t output_padding_bottom, 1357 uint32_t output_padding_left, 1358 uint32_t kernel_height, 1359 uint32_t kernel_width, 1360 uint32_t stride_height, 1361 uint32_t stride_width, 1362 uint32_t dilation_height, 1363 uint32_t dilation_width, 1364 uint32_t groups, 1365 size_t group_input_channels, 1366 size_t group_output_channels, 1367 size_t input_pixel_stride, 1368 size_t output_pixel_stride, 1369 const float* kernel, 1370 const float* bias, 1371 float output_min, 1372 float output_max, 1373 uint32_t flags, 1374 xnn_operator_t* deconvolution_op_out); 1375 1376 enum xnn_status xnn_setup_deconvolution2d_nhwc_f32( 1377 xnn_operator_t deconvolution_op, 1378 size_t batch_size, 1379 size_t input_height, 1380 size_t input_width, 1381 uint32_t adjustment_height, 1382 uint32_t adjustment_width, 1383 const float* input, 1384 float* output, 1385 pthreadpool_t threadpool); 1386 1387 enum xnn_status xnn_create_divide_nd_f32( 1388 float output_min, 1389 float output_max, 1390 uint32_t flags, 1391 xnn_operator_t* divide_op_out); 1392 1393 enum xnn_status xnn_setup_divide_nd_f32( 1394 xnn_operator_t divide_op, 1395 size_t num_input1_dims, 1396 const size_t* input1_shape, 1397 size_t num_input2_dims, 1398 const size_t* input2_shape, 1399 const float* input1, 1400 const float* input2, 1401 float* output, 1402 pthreadpool_t threadpool); 1403 1404 enum xnn_status xnn_create_elu_nc_f32( 1405 size_t channels, 1406 size_t input_stride, 1407 size_t output_stride, 1408 float alpha, 1409 uint32_t flags, 1410 xnn_operator_t* elu_op_out); 1411 1412 enum xnn_status xnn_setup_elu_nc_f32( 1413 xnn_operator_t elu_op, 1414 size_t batch_size, 1415 const float* input, 1416 float* output, 1417 pthreadpool_t threadpool); 1418 1419 enum xnn_status xnn_create_floor_nc_f32( 1420 size_t channels, 1421 size_t input_stride, 1422 size_t output_stride, 1423 uint32_t flags, 1424 xnn_operator_t* floor_op_out); 1425 1426 enum xnn_status xnn_setup_floor_nc_f32( 1427 xnn_operator_t floor_op, 1428 size_t batch_size, 1429 const float* input, 1430 float* output, 1431 pthreadpool_t threadpool); 1432 1433 enum xnn_status xnn_create_fully_connected_nc_f32( 1434 size_t input_channels, 1435 size_t output_channels, 1436 size_t input_stride, 1437 size_t output_stride, 1438 const float* kernel, 1439 const float* bias, 1440 float output_min, 1441 float output_max, 1442 uint32_t flags, 1443 xnn_operator_t* fully_connected_op_out); 1444 1445 enum xnn_status xnn_setup_fully_connected_nc_f32( 1446 xnn_operator_t fully_connected_op, 1447 size_t batch_size, 1448 const float* input, 1449 float* output, 1450 pthreadpool_t threadpool); 1451 1452 enum xnn_status xnn_create_global_average_pooling_nwc_f32( 1453 size_t channels, 1454 size_t input_stride, 1455 size_t output_stride, 1456 float output_min, 1457 float output_max, 1458 uint32_t flags, 1459 xnn_operator_t* global_average_pooling_op_out); 1460 1461 enum xnn_status xnn_setup_global_average_pooling_nwc_f32( 1462 xnn_operator_t global_average_pooling_op, 1463 size_t batch_size, 1464 size_t width, 1465 const float* input, 1466 float* output, 1467 pthreadpool_t threadpool); 1468 1469 enum xnn_status xnn_create_hardswish_nc_f32( 1470 size_t channels, 1471 size_t input_stride, 1472 size_t output_stride, 1473 uint32_t flags, 1474 xnn_operator_t* hardswish_op_out); 1475 1476 enum xnn_status xnn_setup_hardswish_nc_f32( 1477 xnn_operator_t hardswish_op, 1478 size_t batch_size, 1479 const float* input, 1480 float* output, 1481 pthreadpool_t threadpool); 1482 1483 enum xnn_status xnn_create_leaky_relu_nc_f32( 1484 size_t channels, 1485 size_t input_stride, 1486 size_t output_stride, 1487 float negative_slope, 1488 uint32_t flags, 1489 xnn_operator_t* leaky_relu_op_out); 1490 1491 enum xnn_status xnn_setup_leaky_relu_nc_f32( 1492 xnn_operator_t leaky_relu_op, 1493 size_t batch_size, 1494 const float* input, 1495 float* output, 1496 pthreadpool_t threadpool); 1497 1498 enum xnn_status xnn_create_max_pooling2d_nhwc_f32( 1499 uint32_t input_padding_top, 1500 uint32_t input_padding_right, 1501 uint32_t input_padding_bottom, 1502 uint32_t input_padding_left, 1503 uint32_t pooling_height, 1504 uint32_t pooling_width, 1505 uint32_t stride_height, 1506 uint32_t stride_width, 1507 uint32_t dilation_height, 1508 uint32_t dilation_width, 1509 size_t channels, 1510 size_t input_pixel_stride, 1511 size_t output_pixel_stride, 1512 float output_min, 1513 float output_max, 1514 uint32_t flags, 1515 xnn_operator_t* max_pooling_op_out); 1516 1517 enum xnn_status xnn_setup_max_pooling2d_nhwc_f32( 1518 xnn_operator_t max_pooling_op, 1519 size_t batch_size, 1520 size_t input_height, 1521 size_t input_width, 1522 const float* input, 1523 float* output, 1524 pthreadpool_t threadpool); 1525 1526 enum xnn_status xnn_create_maximum_nd_f32( 1527 uint32_t flags, 1528 xnn_operator_t* maximum_op_out); 1529 1530 enum xnn_status xnn_setup_maximum_nd_f32( 1531 xnn_operator_t maximum_op, 1532 size_t num_input1_dims, 1533 const size_t* input1_shape, 1534 size_t num_input2_dims, 1535 const size_t* input2_shape, 1536 const float* input1, 1537 const float* input2, 1538 float* output, 1539 pthreadpool_t threadpool); 1540 1541 enum xnn_status xnn_create_minimum_nd_f32( 1542 uint32_t flags, 1543 xnn_operator_t* minimum_op_out); 1544 1545 enum xnn_status xnn_setup_minimum_nd_f32( 1546 xnn_operator_t minimum_op, 1547 size_t num_input1_dims, 1548 const size_t* input1_shape, 1549 size_t num_input2_dims, 1550 const size_t* input2_shape, 1551 const float* input1, 1552 const float* input2, 1553 float* output, 1554 pthreadpool_t threadpool); 1555 1556 enum xnn_status xnn_create_multiply_nd_f32( 1557 float output_min, 1558 float output_max, 1559 uint32_t flags, 1560 xnn_operator_t* multiply_op_out); 1561 1562 enum xnn_status xnn_setup_multiply_nd_f32( 1563 xnn_operator_t multiply_op, 1564 size_t num_input1_dims, 1565 const size_t* input1_shape, 1566 size_t num_input2_dims, 1567 const size_t* input2_shape, 1568 const float* input1, 1569 const float* input2, 1570 float* output, 1571 pthreadpool_t threadpool); 1572 1573 enum xnn_status xnn_create_negate_nc_f32( 1574 size_t channels, 1575 size_t input_stride, 1576 size_t output_stride, 1577 uint32_t flags, 1578 xnn_operator_t* negate_op_out); 1579 1580 enum xnn_status xnn_setup_negate_nc_f32( 1581 xnn_operator_t negate_op, 1582 size_t batch_size, 1583 const float* input, 1584 float* output, 1585 pthreadpool_t threadpool); 1586 1587 enum xnn_status xnn_create_prelu_nc_f32( 1588 size_t channels, 1589 size_t input_stride, 1590 size_t output_stride, 1591 const float* negative_slope, 1592 uint32_t flags, 1593 xnn_operator_t* prelu_op_out); 1594 1595 enum xnn_status xnn_setup_prelu_nc_f32( 1596 xnn_operator_t prelu_op, 1597 size_t batch_size, 1598 const float* input, 1599 float* output, 1600 pthreadpool_t threadpool); 1601 1602 enum xnn_status xnn_create_resize_bilinear2d_nchw_f32( 1603 size_t channels, 1604 size_t input_pixel_stride, 1605 size_t output_pixel_stride, 1606 uint32_t flags, 1607 xnn_operator_t* resize_op_out); 1608 1609 enum xnn_status xnn_setup_resize_bilinear2d_nchw_f32( 1610 xnn_operator_t resize_op, 1611 size_t batch_size, 1612 size_t input_height, 1613 size_t input_width, 1614 size_t output_height, 1615 size_t output_width, 1616 const float* input, 1617 float* output, 1618 pthreadpool_t threadpool); 1619 1620 enum xnn_status xnn_create_resize_bilinear2d_nhwc_f32( 1621 size_t channels, 1622 size_t input_pixel_stride, 1623 size_t output_pixel_stride, 1624 uint32_t flags, 1625 xnn_operator_t* resize_op_out); 1626 1627 enum xnn_status xnn_setup_resize_bilinear2d_nhwc_f32( 1628 xnn_operator_t resize_op, 1629 size_t batch_size, 1630 size_t input_height, 1631 size_t input_width, 1632 size_t output_height, 1633 size_t output_width, 1634 const float* input, 1635 float* output, 1636 pthreadpool_t threadpool); 1637 1638 enum xnn_status xnn_create_sigmoid_nc_f32( 1639 size_t channels, 1640 size_t input_stride, 1641 size_t output_stride, 1642 uint32_t flags, 1643 xnn_operator_t* sigmoid_op_out); 1644 1645 enum xnn_status xnn_setup_sigmoid_nc_f32( 1646 xnn_operator_t sigmoid_op, 1647 size_t batch_size, 1648 const float* input, 1649 float* output, 1650 pthreadpool_t threadpool); 1651 1652 enum xnn_status xnn_create_softmax_nc_f32( 1653 size_t channels, 1654 size_t input_stride, 1655 size_t output_stride, 1656 uint32_t flags, 1657 xnn_operator_t* softmax_op_out); 1658 1659 enum xnn_status xnn_setup_softmax_nc_f32( 1660 xnn_operator_t softmax_op, 1661 size_t batch_size, 1662 const float* input, 1663 float* output, 1664 pthreadpool_t threadpool); 1665 1666 enum xnn_status xnn_create_square_nc_f32( 1667 size_t channels, 1668 size_t input_stride, 1669 size_t output_stride, 1670 uint32_t flags, 1671 xnn_operator_t* square_op_out); 1672 1673 enum xnn_status xnn_setup_square_nc_f32( 1674 xnn_operator_t square_op, 1675 size_t batch_size, 1676 const float* input, 1677 float* output, 1678 pthreadpool_t threadpool); 1679 1680 enum xnn_status xnn_create_square_root_nc_f32( 1681 size_t channels, 1682 size_t input_stride, 1683 size_t output_stride, 1684 uint32_t flags, 1685 xnn_operator_t* sqrt_op_out); 1686 1687 enum xnn_status xnn_setup_square_root_nc_f32( 1688 xnn_operator_t sqrt_op, 1689 size_t batch_size, 1690 const float* input, 1691 float* output, 1692 pthreadpool_t threadpool); 1693 1694 enum xnn_status xnn_create_squared_difference_nd_f32( 1695 uint32_t flags, 1696 xnn_operator_t* squared_difference_op_out); 1697 1698 enum xnn_status xnn_setup_squared_difference_nd_f32( 1699 xnn_operator_t squared_difference_op, 1700 size_t num_input1_dims, 1701 const size_t* input1_shape, 1702 size_t num_input2_dims, 1703 const size_t* input2_shape, 1704 const float* input1, 1705 const float* input2, 1706 float* output, 1707 pthreadpool_t threadpool); 1708 1709 enum xnn_status xnn_create_subtract_nd_f32( 1710 float output_min, 1711 float output_max, 1712 uint32_t flags, 1713 xnn_operator_t* subtract_op_out); 1714 1715 enum xnn_status xnn_setup_subtract_nd_f32( 1716 xnn_operator_t subtract_op, 1717 size_t num_input1_dims, 1718 const size_t* input1_shape, 1719 size_t num_input2_dims, 1720 const size_t* input2_shape, 1721 const float* input1, 1722 const float* input2, 1723 float* output, 1724 pthreadpool_t threadpool); 1725 1726 enum xnn_status xnn_create_truncation_nc_f32( 1727 size_t channels, 1728 size_t input_stride, 1729 size_t output_stride, 1730 uint32_t flags, 1731 xnn_operator_t* truncation_op_out); 1732 1733 enum xnn_status xnn_setup_truncation_nc_f32( 1734 xnn_operator_t truncation_op, 1735 size_t batch_size, 1736 const float* input, 1737 float* output, 1738 pthreadpool_t threadpool); 1739 1740 #ifndef XNN_NO_NCHW_OPERATORS 1741 1742 enum xnn_status xnn_create_convolution2d_nchw_f32( 1743 uint32_t input_padding_top, 1744 uint32_t input_padding_right, 1745 uint32_t input_padding_bottom, 1746 uint32_t input_padding_left, 1747 uint32_t kernel_height, 1748 uint32_t kernel_width, 1749 uint32_t subsampling_height, 1750 uint32_t subsampling_width, 1751 uint32_t dilation_height, 1752 uint32_t dilation_width, 1753 uint32_t groups, 1754 size_t group_input_channels, 1755 size_t group_output_channels, 1756 size_t input_channel_stride, 1757 size_t output_channel_stride, 1758 const float* kernel, 1759 const float* bias, 1760 float output_min, 1761 float output_max, 1762 uint32_t flags, 1763 xnn_operator_t* convolution_op_out); 1764 1765 enum xnn_status xnn_setup_convolution2d_nchw_f32( 1766 xnn_operator_t convolution_op, 1767 size_t batch_size, 1768 size_t input_height, 1769 size_t input_width, 1770 const float* input, 1771 float* output, 1772 pthreadpool_t threadpool); 1773 1774 enum xnn_status xnn_create_global_average_pooling_ncw_f32( 1775 size_t channels, 1776 float output_min, 1777 float output_max, 1778 uint32_t flags, 1779 xnn_operator_t* global_average_pooling_op_out); 1780 1781 enum xnn_status xnn_setup_global_average_pooling_ncw_f32( 1782 xnn_operator_t global_average_pooling_op, 1783 size_t batch_size, 1784 size_t width, 1785 const float* input, 1786 float* output, 1787 pthreadpool_t threadpool); 1788 1789 #endif // XNN_NO_NCHW_OPERATORS 1790 1791 #endif // XNN_NO_F32_OPERATORS 1792 1793 #ifndef XNN_NO_X32_OPERATORS 1794 1795 enum xnn_status xnn_create_channel_shuffle_nc_x32( 1796 size_t groups, 1797 size_t group_channels, 1798 size_t input_stride, 1799 size_t output_stride, 1800 uint32_t flags, 1801 xnn_operator_t* channel_shuffle_op_out); 1802 1803 enum xnn_status xnn_setup_channel_shuffle_nc_x32( 1804 xnn_operator_t channel_shuffle_op, 1805 size_t batch_size, 1806 const void* input, 1807 void* output, 1808 pthreadpool_t threadpool); 1809 1810 enum xnn_status xnn_create_constant_pad_nd_x32( 1811 const void* padding_value, 1812 uint32_t flags, 1813 xnn_operator_t* constant_pad_op_out); 1814 1815 enum xnn_status xnn_setup_constant_pad_nd_x32( 1816 xnn_operator_t constant_pad_op, 1817 size_t num_dims, 1818 const size_t* input_shape, 1819 const size_t* pre_padding, 1820 const size_t* post_padding, 1821 const void* input, 1822 void* output, 1823 pthreadpool_t threadpool); 1824 1825 enum xnn_status xnn_create_copy_nc_x32( 1826 size_t channels, 1827 size_t input_stride, 1828 size_t output_stride, 1829 uint32_t flags, 1830 xnn_operator_t* copy_op_out); 1831 1832 enum xnn_status xnn_setup_copy_nc_x32( 1833 xnn_operator_t copy_op, 1834 size_t batch_size, 1835 const void* input, 1836 void* output, 1837 pthreadpool_t threadpool); 1838 1839 enum xnn_status xnn_create_depth_to_space_nhwc_x32( 1840 size_t output_channels, 1841 size_t input_channel_stride, 1842 size_t output_channel_stride, 1843 uint32_t block_size, 1844 uint32_t flags, 1845 xnn_operator_t* depth_to_space_op_out); 1846 1847 enum xnn_status xnn_setup_depth_to_space_nhwc_x32( 1848 xnn_operator_t depth_to_space_op, 1849 size_t batch_size, 1850 size_t input_height, 1851 size_t input_width, 1852 const void* input, 1853 void* output, 1854 pthreadpool_t threadpool); 1855 1856 enum xnn_status xnn_create_depth_to_space_nchw2nhwc_x32( 1857 size_t output_channels, 1858 size_t input_channel_stride, 1859 size_t output_channel_stride, 1860 uint32_t block_size, 1861 uint32_t flags, 1862 xnn_operator_t* depth_to_space_op_out); 1863 1864 enum xnn_status xnn_setup_depth_to_space_nchw2nhwc_x32( 1865 xnn_operator_t depth_to_space_op, 1866 size_t batch_size, 1867 size_t input_height, 1868 size_t input_width, 1869 const void* input, 1870 void* output, 1871 pthreadpool_t threadpool); 1872 1873 enum xnn_status xnn_create_unpooling2d_nhwc_x32( 1874 uint32_t input_padding_top, 1875 uint32_t input_padding_right, 1876 uint32_t input_padding_bottom, 1877 uint32_t input_padding_left, 1878 uint32_t pooling_height, 1879 uint32_t pooling_width, 1880 size_t channels, 1881 size_t input_pixel_stride, 1882 size_t output_pixel_stride, 1883 uint32_t flags, 1884 xnn_operator_t* unpooling_op_out); 1885 1886 enum xnn_status xnn_setup_unpooling2d_nhwc_x32( 1887 xnn_operator_t unpooling_op, 1888 size_t batch_size, 1889 size_t input_height, 1890 size_t input_width, 1891 const void* input, 1892 const uint32_t* index, 1893 void* output, 1894 pthreadpool_t threadpool); 1895 1896 #endif // XNN_NO_X32_OPERATORS 1897 1898 #ifndef XNN_NO_F16_OPERATORS 1899 1900 enum xnn_status xnn_create_add_nd_f16( 1901 float output_min, 1902 float output_max, 1903 uint32_t flags, 1904 xnn_operator_t* add_op_out); 1905 1906 enum xnn_status xnn_setup_add_nd_f16( 1907 xnn_operator_t add_op, 1908 size_t num_input1_dims, 1909 const size_t* input1_shape, 1910 size_t num_input2_dims, 1911 const size_t* input2_shape, 1912 const void* input1, 1913 const void* input2, 1914 void* output, 1915 pthreadpool_t threadpool); 1916 1917 enum xnn_status xnn_create_convolution2d_nhwc_f16( 1918 uint32_t input_padding_top, 1919 uint32_t input_padding_right, 1920 uint32_t input_padding_bottom, 1921 uint32_t input_padding_left, 1922 uint32_t kernel_height, 1923 uint32_t kernel_width, 1924 uint32_t subsampling_height, 1925 uint32_t subsampling_width, 1926 uint32_t dilation_height, 1927 uint32_t dilation_width, 1928 uint32_t groups, 1929 size_t group_input_channels, 1930 size_t group_output_channels, 1931 size_t input_channel_stride, 1932 size_t output_channel_stride, 1933 const void* kernel, 1934 const void* bias, 1935 float output_min, 1936 float output_max, 1937 uint32_t flags, 1938 xnn_operator_t* convolution_op_out); 1939 1940 enum xnn_status xnn_setup_convolution2d_nhwc_f16( 1941 xnn_operator_t convolution_op, 1942 size_t batch_size, 1943 size_t input_height, 1944 size_t input_width, 1945 const void* input, 1946 void* output, 1947 pthreadpool_t threadpool); 1948 1949 enum xnn_status xnn_create_fully_connected_nc_f16( 1950 size_t input_channels, 1951 size_t output_channels, 1952 size_t input_stride, 1953 size_t output_stride, 1954 const void* kernel, 1955 const void* bias, 1956 float output_min, 1957 float output_max, 1958 uint32_t flags, 1959 xnn_operator_t* fully_connected_op_out); 1960 1961 enum xnn_status xnn_setup_fully_connected_nc_f16( 1962 xnn_operator_t fully_connected_op, 1963 size_t batch_size, 1964 const void* input, 1965 void* output, 1966 pthreadpool_t threadpool); 1967 1968 enum xnn_status xnn_create_global_average_pooling_nwc_f16( 1969 size_t channels, 1970 size_t input_stride, 1971 size_t output_stride, 1972 float output_min, 1973 float output_max, 1974 uint32_t flags, 1975 xnn_operator_t* global_average_pooling_op_out); 1976 1977 enum xnn_status xnn_setup_global_average_pooling_nwc_f16( 1978 xnn_operator_t global_average_pooling_op, 1979 size_t batch_size, 1980 size_t width, 1981 const void* input, 1982 void* output, 1983 pthreadpool_t threadpool); 1984 1985 enum xnn_status xnn_create_hardswish_nc_f16( 1986 size_t channels, 1987 size_t input_stride, 1988 size_t output_stride, 1989 uint32_t flags, 1990 xnn_operator_t* hardswish_op_out); 1991 1992 enum xnn_status xnn_setup_hardswish_nc_f16( 1993 xnn_operator_t hardswish_op, 1994 size_t batch_size, 1995 const void* input, 1996 void* output, 1997 pthreadpool_t threadpool); 1998 1999 enum xnn_status xnn_create_max_pooling2d_nhwc_f16( 2000 uint32_t input_padding_top, 2001 uint32_t input_padding_right, 2002 uint32_t input_padding_bottom, 2003 uint32_t input_padding_left, 2004 uint32_t pooling_height, 2005 uint32_t pooling_width, 2006 uint32_t stride_height, 2007 uint32_t stride_width, 2008 uint32_t dilation_height, 2009 uint32_t dilation_width, 2010 size_t channels, 2011 size_t input_pixel_stride, 2012 size_t output_pixel_stride, 2013 float output_min, 2014 float output_max, 2015 uint32_t flags, 2016 xnn_operator_t* max_pooling_op_out); 2017 2018 enum xnn_status xnn_setup_max_pooling2d_nhwc_f16( 2019 xnn_operator_t max_pooling_op, 2020 size_t batch_size, 2021 size_t input_height, 2022 size_t input_width, 2023 const void* input, 2024 void* output, 2025 pthreadpool_t threadpool); 2026 2027 enum xnn_status xnn_create_multiply_nd_f16( 2028 float output_min, 2029 float output_max, 2030 uint32_t flags, 2031 xnn_operator_t* multiply_op_out); 2032 2033 enum xnn_status xnn_setup_multiply_nd_f16( 2034 xnn_operator_t multiply_op, 2035 size_t num_input1_dims, 2036 const size_t* input1_shape, 2037 size_t num_input2_dims, 2038 const size_t* input2_shape, 2039 const void* input1, 2040 const void* input2, 2041 void* output, 2042 pthreadpool_t threadpool); 2043 2044 enum xnn_status xnn_create_prelu_nc_f16( 2045 size_t channels, 2046 size_t input_stride, 2047 size_t output_stride, 2048 const void* negative_slope, 2049 uint32_t flags, 2050 xnn_operator_t* prelu_op_out); 2051 2052 enum xnn_status xnn_setup_prelu_nc_f16( 2053 xnn_operator_t prelu_op, 2054 size_t batch_size, 2055 const void* input, 2056 void* output, 2057 pthreadpool_t threadpool); 2058 2059 #endif // XNN_NO_F16_OPERATORS 2060 2061 #ifndef XNN_NO_X16_OPERATORS 2062 2063 enum xnn_status xnn_create_constant_pad_nd_x16( 2064 const void* padding_value, 2065 uint32_t flags, 2066 xnn_operator_t* constant_pad_op_out); 2067 2068 enum xnn_status xnn_setup_constant_pad_nd_x16( 2069 xnn_operator_t constant_pad_op, 2070 size_t num_dims, 2071 const size_t* input_shape, 2072 const size_t* pre_padding, 2073 const size_t* post_padding, 2074 const void* input, 2075 void* output, 2076 pthreadpool_t threadpool); 2077 2078 enum xnn_status xnn_create_copy_nc_x16( 2079 size_t channels, 2080 size_t input_stride, 2081 size_t output_stride, 2082 uint32_t flags, 2083 xnn_operator_t* copy_op_out); 2084 2085 enum xnn_status xnn_setup_copy_nc_x16( 2086 xnn_operator_t copy_op, 2087 size_t batch_size, 2088 const void* input, 2089 void* output, 2090 pthreadpool_t threadpool); 2091 2092 #endif // XNN_NO_X16_OPERATORS 2093 2094 #ifndef XNN_NO_QC8_OPERATORS 2095 2096 enum xnn_status xnn_create_convolution2d_nhwc_qc8( 2097 uint32_t input_padding_top, 2098 uint32_t input_padding_right, 2099 uint32_t input_padding_bottom, 2100 uint32_t input_padding_left, 2101 uint32_t kernel_height, 2102 uint32_t kernel_width, 2103 uint32_t subsampling_height, 2104 uint32_t subsampling_width, 2105 uint32_t dilation_height, 2106 uint32_t dilation_width, 2107 uint32_t groups, 2108 size_t group_input_channels, 2109 size_t group_output_channels, 2110 size_t input_channel_stride, 2111 size_t output_channel_stride, 2112 int8_t input_zero_point, 2113 float input_scale, 2114 const float* kernel_scale, 2115 const int8_t* kernel, 2116 const int32_t* bias, 2117 int8_t output_zero_point, 2118 float output_scale, 2119 int8_t output_min, 2120 int8_t output_max, 2121 uint32_t flags, 2122 xnn_operator_t* convolution_op_out); 2123 2124 enum xnn_status xnn_setup_convolution2d_nhwc_qc8( 2125 xnn_operator_t convolution_op, 2126 size_t batch_size, 2127 size_t input_height, 2128 size_t input_width, 2129 const int8_t* input, 2130 int8_t* output, 2131 pthreadpool_t threadpool); 2132 2133 #endif // XNN_NO_QC8_OPERATORS 2134 2135 #ifndef XNN_NO_QS8_OPERATORS 2136 2137 enum xnn_status xnn_create_add_nd_qs8( 2138 int8_t input1_zero_point, 2139 float input1_scale, 2140 int8_t input2_zero_point, 2141 float input2_scale, 2142 int8_t output_zero_point, 2143 float output_scale, 2144 int8_t output_min, 2145 int8_t output_max, 2146 uint32_t flags, 2147 xnn_operator_t* add_op_out); 2148 2149 enum xnn_status xnn_setup_add_nd_qs8( 2150 xnn_operator_t add_op, 2151 size_t num_input1_dims, 2152 const size_t* input1_shape, 2153 size_t num_input2_dims, 2154 const size_t* input2_shape, 2155 const int8_t* input1, 2156 const int8_t* input2, 2157 int8_t* output, 2158 pthreadpool_t threadpool); 2159 2160 enum xnn_status xnn_create_convolution2d_nhwc_qs8( 2161 uint32_t input_padding_top, 2162 uint32_t input_padding_right, 2163 uint32_t input_padding_bottom, 2164 uint32_t input_padding_left, 2165 uint32_t kernel_height, 2166 uint32_t kernel_width, 2167 uint32_t subsampling_height, 2168 uint32_t subsampling_width, 2169 uint32_t dilation_height, 2170 uint32_t dilation_width, 2171 uint32_t groups, 2172 size_t group_input_channels, 2173 size_t group_output_channels, 2174 size_t input_channel_stride, 2175 size_t output_channel_stride, 2176 int8_t input_zero_point, 2177 float input_scale, 2178 float kernel_scale, 2179 const int8_t* kernel, 2180 const int32_t* bias, 2181 int8_t output_zero_point, 2182 float output_scale, 2183 int8_t output_min, 2184 int8_t output_max, 2185 uint32_t flags, 2186 xnn_operator_t* convolution_op_out); 2187 2188 enum xnn_status xnn_setup_convolution2d_nhwc_qs8( 2189 xnn_operator_t convolution_op, 2190 size_t batch_size, 2191 size_t input_height, 2192 size_t input_width, 2193 const int8_t* input, 2194 int8_t* output, 2195 pthreadpool_t threadpool); 2196 2197 enum xnn_status xnn_create_deconvolution2d_nhwc_qs8( 2198 uint32_t output_padding_top, 2199 uint32_t output_padding_right, 2200 uint32_t output_padding_bottom, 2201 uint32_t output_padding_left, 2202 uint32_t kernel_height, 2203 uint32_t kernel_width, 2204 uint32_t stride_height, 2205 uint32_t stride_width, 2206 uint32_t dilation_height, 2207 uint32_t dilation_width, 2208 uint32_t groups, 2209 size_t group_input_channels, 2210 size_t group_output_channels, 2211 size_t input_pixel_stride, 2212 size_t output_pixel_stride, 2213 int8_t input_zero_point, 2214 float input_scale, 2215 float kernel_scale, 2216 const int8_t* kernel, 2217 const int32_t* bias, 2218 int8_t output_zero_point, 2219 float output_scale, 2220 int8_t output_min, 2221 int8_t output_max, 2222 uint32_t flags, 2223 xnn_operator_t* deconvolution_op_out); 2224 2225 enum xnn_status xnn_setup_deconvolution2d_nhwc_qs8( 2226 xnn_operator_t deconvolution_op, 2227 size_t batch_size, 2228 size_t input_height, 2229 size_t input_width, 2230 uint32_t adjustment_height, 2231 uint32_t adjustment_width, 2232 const int8_t* input, 2233 int8_t* output, 2234 pthreadpool_t threadpool); 2235 2236 enum xnn_status xnn_create_elu_nc_qs8( 2237 size_t channels, 2238 size_t input_stride, 2239 size_t output_stride, 2240 float alpha, 2241 int8_t input_zero_point, 2242 float input_scale, 2243 int8_t output_zero_point, 2244 float output_scale, 2245 int8_t output_min, 2246 int8_t output_max, 2247 uint32_t flags, 2248 xnn_operator_t* elu_op_out); 2249 2250 enum xnn_status xnn_setup_elu_nc_qs8( 2251 xnn_operator_t elu_op, 2252 size_t batch_size, 2253 const int8_t* input, 2254 int8_t* output, 2255 pthreadpool_t threadpool); 2256 2257 enum xnn_status xnn_create_fully_connected_nc_qs8( 2258 size_t input_channels, 2259 size_t output_channels, 2260 size_t input_stride, 2261 size_t output_stride, 2262 int8_t input_zero_point, 2263 float input_scale, 2264 float kernel_scale, 2265 const int8_t* kernel, 2266 const int32_t* bias, 2267 int8_t output_zero_point, 2268 float output_scale, 2269 int8_t output_min, 2270 int8_t output_max, 2271 uint32_t flags, 2272 xnn_operator_t* fully_connected_op_out); 2273 2274 enum xnn_status xnn_setup_fully_connected_nc_qs8( 2275 xnn_operator_t fully_connected_op, 2276 size_t batch_size, 2277 const int8_t* input, 2278 int8_t* output, 2279 pthreadpool_t threadpool); 2280 2281 enum xnn_status xnn_create_global_average_pooling_nwc_qs8( 2282 size_t channels, 2283 size_t input_stride, 2284 size_t output_stride, 2285 int8_t input_zero_point, 2286 float input_scale, 2287 int8_t output_zero_point, 2288 float output_scale, 2289 int8_t output_min, 2290 int8_t output_max, 2291 uint32_t flags, 2292 xnn_operator_t* global_average_pooling_op_out); 2293 2294 enum xnn_status xnn_setup_global_average_pooling_nwc_qs8( 2295 xnn_operator_t global_average_pooling_op, 2296 size_t batch_size, 2297 size_t width, 2298 const int8_t* input, 2299 int8_t* output, 2300 pthreadpool_t threadpool); 2301 2302 enum xnn_status xnn_create_multiply_nd_qs8( 2303 int8_t input1_zero_point, 2304 float input1_scale, 2305 int8_t input2_zero_point, 2306 float input2_scale, 2307 int8_t output_zero_point, 2308 float output_scale, 2309 int8_t output_min, 2310 int8_t output_max, 2311 uint32_t flags, 2312 xnn_operator_t* multiply_op_out); 2313 2314 enum xnn_status xnn_setup_multiply_nd_qs8( 2315 xnn_operator_t multiply_op, 2316 size_t num_input1_dims, 2317 const size_t* input1_shape, 2318 size_t num_input2_dims, 2319 const size_t* input2_shape, 2320 const int8_t* input1, 2321 const int8_t* input2, 2322 int8_t* output, 2323 pthreadpool_t threadpool); 2324 2325 enum xnn_status xnn_create_sigmoid_nc_qs8( 2326 size_t channels, 2327 size_t input_stride, 2328 size_t output_stride, 2329 int8_t input_zero_point, 2330 float input_scale, 2331 int8_t output_zero_point, 2332 float output_scale, 2333 int8_t output_min, 2334 int8_t output_max, 2335 uint32_t flags, 2336 xnn_operator_t* sigmoid_op_out); 2337 2338 enum xnn_status xnn_setup_sigmoid_nc_qs8( 2339 xnn_operator_t sigmoid_op, 2340 size_t batch_size, 2341 const int8_t* input, 2342 int8_t* output, 2343 pthreadpool_t threadpool); 2344 2345 enum xnn_status xnn_create_subtract_nd_qs8( 2346 int8_t input1_zero_point, 2347 float input1_scale, 2348 int8_t input2_zero_point, 2349 float input2_scale, 2350 int8_t output_zero_point, 2351 float output_scale, 2352 int8_t output_min, 2353 int8_t output_max, 2354 uint32_t flags, 2355 xnn_operator_t* subtract_op_out); 2356 2357 enum xnn_status xnn_setup_subtract_nd_qs8( 2358 xnn_operator_t subtract_op, 2359 size_t num_input1_dims, 2360 const size_t* input1_shape, 2361 size_t num_input2_dims, 2362 const size_t* input2_shape, 2363 const int8_t* input1, 2364 const int8_t* input2, 2365 int8_t* output, 2366 pthreadpool_t threadpool); 2367 2368 enum xnn_status xnn_create_tanh_nc_qs8( 2369 size_t channels, 2370 size_t input_stride, 2371 size_t output_stride, 2372 int8_t input_zero_point, 2373 float input_scale, 2374 int8_t output_zero_point, 2375 float output_scale, 2376 int8_t output_min, 2377 int8_t output_max, 2378 uint32_t flags, 2379 xnn_operator_t* tanh_op_out); 2380 2381 enum xnn_status xnn_setup_tanh_nc_qs8( 2382 xnn_operator_t tanh_op, 2383 size_t batch_size, 2384 const int8_t* input, 2385 int8_t* output, 2386 pthreadpool_t threadpool); 2387 2388 #endif // XNN_NO_QS8_OPERATORS 2389 2390 #ifndef XNN_NO_QU8_OPERATORS 2391 2392 enum xnn_status xnn_create_add_nd_qu8( 2393 uint8_t input1_zero_point, 2394 float input1_scale, 2395 uint8_t input2_zero_point, 2396 float input2_scale, 2397 uint8_t output_zero_point, 2398 float output_scale, 2399 uint8_t output_min, 2400 uint8_t output_max, 2401 uint32_t flags, 2402 xnn_operator_t* add_op_out); 2403 2404 enum xnn_status xnn_setup_add_nd_qu8( 2405 xnn_operator_t add_op, 2406 size_t num_input1_dims, 2407 const size_t* input1_shape, 2408 size_t num_input2_dims, 2409 const size_t* input2_shape, 2410 const uint8_t* input1, 2411 const uint8_t* input2, 2412 uint8_t* output, 2413 pthreadpool_t threadpool); 2414 2415 enum xnn_status xnn_create_average_pooling2d_nhwc_qu8( 2416 uint32_t input_padding_top, 2417 uint32_t input_padding_right, 2418 uint32_t input_padding_bottom, 2419 uint32_t input_padding_left, 2420 uint32_t pooling_height, 2421 uint32_t pooling_width, 2422 uint32_t stride_height, 2423 uint32_t stride_width, 2424 size_t channels, 2425 size_t input_pixel_stride, 2426 size_t output_pixel_stride, 2427 uint8_t input_zero_point, 2428 float input_scale, 2429 uint8_t output_zero_point, 2430 float output_scale, 2431 uint8_t output_min, 2432 uint8_t output_max, 2433 uint32_t flags, 2434 xnn_operator_t* average_pooling_op_out); 2435 2436 enum xnn_status xnn_setup_average_pooling2d_nhwc_qu8( 2437 xnn_operator_t average_pooling_op, 2438 size_t batch_size, 2439 size_t input_height, 2440 size_t input_width, 2441 const uint8_t* input, 2442 uint8_t* output, 2443 pthreadpool_t threadpool); 2444 2445 enum xnn_status xnn_create_convolution2d_nhwc_qu8( 2446 uint32_t input_padding_top, 2447 uint32_t input_padding_right, 2448 uint32_t input_padding_bottom, 2449 uint32_t input_padding_left, 2450 uint32_t kernel_height, 2451 uint32_t kernel_width, 2452 uint32_t subsampling_height, 2453 uint32_t subsampling_width, 2454 uint32_t dilation_height, 2455 uint32_t dilation_width, 2456 uint32_t groups, 2457 size_t group_input_channels, 2458 size_t group_output_channels, 2459 size_t input_channel_stride, 2460 size_t output_channel_stride, 2461 uint8_t input_zero_point, 2462 float input_scale, 2463 uint8_t kernel_zero_point, 2464 float kernel_scale, 2465 const uint8_t* kernel, 2466 const int32_t* bias, 2467 uint8_t output_zero_point, 2468 float output_scale, 2469 uint8_t output_min, 2470 uint8_t output_max, 2471 uint32_t flags, 2472 xnn_operator_t* convolution_op_out); 2473 2474 enum xnn_status xnn_setup_convolution2d_nhwc_qu8( 2475 xnn_operator_t convolution_op, 2476 size_t batch_size, 2477 size_t input_height, 2478 size_t input_width, 2479 const uint8_t* input, 2480 uint8_t* output, 2481 pthreadpool_t threadpool); 2482 2483 enum xnn_status xnn_create_deconvolution2d_nhwc_qu8( 2484 uint32_t output_padding_top, 2485 uint32_t output_padding_right, 2486 uint32_t output_padding_bottom, 2487 uint32_t output_padding_left, 2488 uint32_t kernel_height, 2489 uint32_t kernel_width, 2490 uint32_t stride_height, 2491 uint32_t stride_width, 2492 uint32_t dilation_height, 2493 uint32_t dilation_width, 2494 uint32_t groups, 2495 size_t group_input_channels, 2496 size_t group_output_channels, 2497 size_t input_pixel_stride, 2498 size_t output_pixel_stride, 2499 uint8_t input_zero_point, 2500 float input_scale, 2501 uint8_t kernel_zero_point, 2502 float kernel_scale, 2503 const uint8_t* kernel, 2504 const int32_t* bias, 2505 uint8_t output_zero_point, 2506 float output_scale, 2507 uint8_t output_min, 2508 uint8_t output_max, 2509 uint32_t flags, 2510 xnn_operator_t* deconvolution_op_out); 2511 2512 enum xnn_status xnn_setup_deconvolution2d_nhwc_qu8( 2513 xnn_operator_t deconvolution_op, 2514 size_t batch_size, 2515 size_t input_height, 2516 size_t input_width, 2517 uint32_t adjustment_height, 2518 uint32_t adjustment_width, 2519 const uint8_t* input, 2520 uint8_t* output, 2521 pthreadpool_t threadpool); 2522 2523 enum xnn_status xnn_create_fully_connected_nc_qu8( 2524 size_t input_channels, 2525 size_t output_channels, 2526 size_t input_stride, 2527 size_t output_stride, 2528 uint8_t input_zero_point, 2529 float input_scale, 2530 uint8_t kernel_zero_point, 2531 float kernel_scale, 2532 const uint8_t* kernel, 2533 const int32_t* bias, 2534 uint8_t output_zero_point, 2535 float output_scale, 2536 uint8_t output_min, 2537 uint8_t output_max, 2538 uint32_t flags, 2539 xnn_operator_t* fully_connected_op_out); 2540 2541 enum xnn_status xnn_setup_fully_connected_nc_qu8( 2542 xnn_operator_t fully_connected_op, 2543 size_t batch_size, 2544 const uint8_t* input, 2545 uint8_t* output, 2546 pthreadpool_t threadpool); 2547 2548 enum xnn_status xnn_create_global_average_pooling_nwc_qu8( 2549 size_t channels, 2550 size_t input_stride, 2551 size_t output_stride, 2552 uint8_t input_zero_point, 2553 float input_scale, 2554 uint8_t output_zero_point, 2555 float output_scale, 2556 uint8_t output_min, 2557 uint8_t output_max, 2558 uint32_t flags, 2559 xnn_operator_t* global_average_pooling_op_out); 2560 2561 enum xnn_status xnn_setup_global_average_pooling_nwc_qu8( 2562 xnn_operator_t global_average_pooling_op, 2563 size_t batch_size, 2564 size_t width, 2565 const uint8_t* input, 2566 uint8_t* output, 2567 pthreadpool_t threadpool); 2568 2569 enum xnn_status xnn_create_leaky_relu_nc_qu8( 2570 size_t channels, 2571 size_t input_stride, 2572 size_t output_stride, 2573 float negative_slope, 2574 uint8_t input_zero_point, 2575 float input_scale, 2576 uint8_t output_zero_point, 2577 float output_scale, 2578 uint8_t output_min, 2579 uint8_t output_max, 2580 uint32_t flags, 2581 xnn_operator_t* leaky_relu_op_out); 2582 2583 enum xnn_status xnn_setup_leaky_relu_nc_qu8( 2584 xnn_operator_t leaky_relu_op, 2585 size_t batch_size, 2586 const uint8_t* input, 2587 uint8_t* output, 2588 pthreadpool_t threadpool); 2589 2590 enum xnn_status xnn_create_multiply_nd_qu8( 2591 uint8_t input1_zero_point, 2592 float input1_scale, 2593 uint8_t input2_zero_point, 2594 float input2_scale, 2595 uint8_t output_zero_point, 2596 float output_scale, 2597 uint8_t output_min, 2598 uint8_t output_max, 2599 uint32_t flags, 2600 xnn_operator_t* multiply_op_out); 2601 2602 enum xnn_status xnn_setup_multiply_nd_qu8( 2603 xnn_operator_t multiply_op, 2604 size_t num_input1_dims, 2605 const size_t* input1_shape, 2606 size_t num_input2_dims, 2607 const size_t* input2_shape, 2608 const uint8_t* input1, 2609 const uint8_t* input2, 2610 uint8_t* output, 2611 pthreadpool_t threadpool); 2612 2613 enum xnn_status xnn_create_sigmoid_nc_qu8( 2614 size_t channels, 2615 size_t input_stride, 2616 size_t output_stride, 2617 uint8_t input_zero_point, 2618 float input_scale, 2619 uint8_t output_zero_point, 2620 float output_scale, 2621 uint8_t output_min, 2622 uint8_t output_max, 2623 uint32_t flags, 2624 xnn_operator_t* sigmoid_op_out); 2625 2626 enum xnn_status xnn_setup_sigmoid_nc_qu8( 2627 xnn_operator_t sigmoid_op, 2628 size_t batch_size, 2629 const uint8_t* input, 2630 uint8_t* output, 2631 pthreadpool_t threadpool); 2632 2633 enum xnn_status xnn_create_softmax_nc_qu8( 2634 size_t channels, 2635 size_t input_stride, 2636 size_t output_stride, 2637 float input_scale, 2638 uint8_t output_zero_point, 2639 float output_scale, 2640 uint32_t flags, 2641 xnn_operator_t* softmax_op_out); 2642 2643 enum xnn_status xnn_setup_softmax_nc_qu8( 2644 xnn_operator_t softmax_op, 2645 size_t batch_size, 2646 const uint8_t* input, 2647 uint8_t* output, 2648 pthreadpool_t threadpool); 2649 2650 enum xnn_status xnn_create_subtract_nd_qu8( 2651 uint8_t input1_zero_point, 2652 float input1_scale, 2653 uint8_t input2_zero_point, 2654 float input2_scale, 2655 uint8_t output_zero_point, 2656 float output_scale, 2657 uint8_t output_min, 2658 uint8_t output_max, 2659 uint32_t flags, 2660 xnn_operator_t* subtract_op_out); 2661 2662 enum xnn_status xnn_setup_subtract_nd_qu8( 2663 xnn_operator_t subtract_op, 2664 size_t num_input1_dims, 2665 const size_t* input1_shape, 2666 size_t num_input2_dims, 2667 const size_t* input2_shape, 2668 const uint8_t* input1, 2669 const uint8_t* input2, 2670 uint8_t* output, 2671 pthreadpool_t threadpool); 2672 2673 enum xnn_status xnn_create_tanh_nc_qu8( 2674 size_t channels, 2675 size_t input_stride, 2676 size_t output_stride, 2677 uint8_t input_zero_point, 2678 float input_scale, 2679 uint8_t output_zero_point, 2680 float output_scale, 2681 uint8_t output_min, 2682 uint8_t output_max, 2683 uint32_t flags, 2684 xnn_operator_t* tanh_op_out); 2685 2686 enum xnn_status xnn_setup_tanh_nc_qu8( 2687 xnn_operator_t tanh_op, 2688 size_t batch_size, 2689 const uint8_t* input, 2690 uint8_t* output, 2691 pthreadpool_t threadpool); 2692 2693 #endif // XNN_NO_QU8_OPERATORS 2694 2695 #ifndef XNN_NO_S8_OPERATORS 2696 2697 enum xnn_status xnn_create_clamp_nc_s8( 2698 size_t channels, 2699 size_t input_stride, 2700 size_t output_stride, 2701 int8_t output_min, 2702 int8_t output_max, 2703 uint32_t flags, 2704 xnn_operator_t* clamp_op_out); 2705 2706 enum xnn_status xnn_setup_clamp_nc_s8( 2707 xnn_operator_t clamp_op, 2708 size_t batch_size, 2709 const int8_t* input, 2710 int8_t* output, 2711 pthreadpool_t threadpool); 2712 2713 enum xnn_status xnn_create_max_pooling2d_nhwc_s8( 2714 uint32_t input_padding_top, 2715 uint32_t input_padding_right, 2716 uint32_t input_padding_bottom, 2717 uint32_t input_padding_left, 2718 uint32_t pooling_height, 2719 uint32_t pooling_width, 2720 uint32_t stride_height, 2721 uint32_t stride_width, 2722 uint32_t dilation_height, 2723 uint32_t dilation_width, 2724 size_t channels, 2725 size_t input_pixel_stride, 2726 size_t output_pixel_stride, 2727 int8_t output_min, 2728 int8_t output_max, 2729 uint32_t flags, 2730 xnn_operator_t* max_pooling_op_out); 2731 2732 enum xnn_status xnn_setup_max_pooling2d_nhwc_s8( 2733 xnn_operator_t max_pooling_op, 2734 size_t batch_size, 2735 size_t input_height, 2736 size_t input_width, 2737 const int8_t* input, 2738 int8_t* output, 2739 pthreadpool_t threadpool); 2740 2741 enum xnn_status xnn_create_resize_bilinear2d_nhwc_s8( 2742 size_t channels, 2743 size_t input_pixel_stride, 2744 size_t output_pixel_stride, 2745 uint32_t flags, 2746 xnn_operator_t* resize_op_out); 2747 2748 enum xnn_status xnn_setup_resize_bilinear2d_nhwc_s8( 2749 xnn_operator_t resize_op, 2750 size_t batch_size, 2751 size_t input_height, 2752 size_t input_width, 2753 size_t output_height, 2754 size_t output_width, 2755 const int8_t* input, 2756 int8_t* output, 2757 pthreadpool_t threadpool); 2758 2759 #endif // XNN_NO_S8_OPERATORS 2760 2761 #ifndef XNN_NO_U8_OPERATORS 2762 2763 enum xnn_status xnn_create_clamp_nc_u8( 2764 size_t channels, 2765 size_t input_stride, 2766 size_t output_stride, 2767 uint8_t output_min, 2768 uint8_t output_max, 2769 uint32_t flags, 2770 xnn_operator_t* clamp_op_out); 2771 2772 enum xnn_status xnn_setup_clamp_nc_u8( 2773 xnn_operator_t clamp_op, 2774 size_t batch_size, 2775 const uint8_t* input, 2776 uint8_t* output, 2777 pthreadpool_t threadpool); 2778 2779 enum xnn_status xnn_create_max_pooling2d_nhwc_u8( 2780 uint32_t input_padding_top, 2781 uint32_t input_padding_right, 2782 uint32_t input_padding_bottom, 2783 uint32_t input_padding_left, 2784 uint32_t pooling_height, 2785 uint32_t pooling_width, 2786 uint32_t stride_height, 2787 uint32_t stride_width, 2788 uint32_t dilation_height, 2789 uint32_t dilation_width, 2790 size_t channels, 2791 size_t input_pixel_stride, 2792 size_t output_pixel_stride, 2793 uint8_t output_min, 2794 uint8_t output_max, 2795 uint32_t flags, 2796 xnn_operator_t* max_pooling_op_out); 2797 2798 enum xnn_status xnn_setup_max_pooling2d_nhwc_u8( 2799 xnn_operator_t max_pooling_op, 2800 size_t batch_size, 2801 size_t input_height, 2802 size_t input_width, 2803 const uint8_t* input, 2804 uint8_t* output, 2805 pthreadpool_t threadpool); 2806 2807 enum xnn_status xnn_create_resize_bilinear2d_nhwc_u8( 2808 size_t channels, 2809 size_t input_pixel_stride, 2810 size_t output_pixel_stride, 2811 uint32_t flags, 2812 xnn_operator_t* resize_op_out); 2813 2814 enum xnn_status xnn_setup_resize_bilinear2d_nhwc_u8( 2815 xnn_operator_t resize_op, 2816 size_t batch_size, 2817 size_t input_height, 2818 size_t input_width, 2819 size_t output_height, 2820 size_t output_width, 2821 const uint8_t* input, 2822 uint8_t* output, 2823 pthreadpool_t threadpool); 2824 2825 #endif // XNN_NO_U8_OPERATORS 2826 2827 #ifndef XNN_NO_X8_OPERATORS 2828 2829 enum xnn_status xnn_create_copy_nc_x8( 2830 size_t channels, 2831 size_t input_stride, 2832 size_t output_stride, 2833 uint32_t flags, 2834 xnn_operator_t* copy_op_out); 2835 2836 enum xnn_status xnn_setup_copy_nc_x8( 2837 xnn_operator_t copy_op, 2838 size_t batch_size, 2839 const void* input, 2840 void* output, 2841 pthreadpool_t threadpool); 2842 2843 enum xnn_status xnn_create_channel_shuffle_nc_x8( 2844 size_t groups, 2845 size_t group_channels, 2846 size_t input_stride, 2847 size_t output_stride, 2848 uint32_t flags, 2849 xnn_operator_t* channel_shuffle_op_out); 2850 2851 enum xnn_status xnn_setup_channel_shuffle_nc_x8( 2852 xnn_operator_t channel_shuffle_op, 2853 size_t batch_size, 2854 const void* input, 2855 void* output, 2856 pthreadpool_t threadpool); 2857 2858 enum xnn_status xnn_create_constant_pad_nd_x8( 2859 const void* padding_value, 2860 uint32_t flags, 2861 xnn_operator_t* constant_pad_op_out); 2862 2863 enum xnn_status xnn_setup_constant_pad_nd_x8( 2864 xnn_operator_t constant_pad_op, 2865 size_t num_dims, 2866 const size_t* input_shape, 2867 const size_t* pre_padding, 2868 const size_t* post_padding, 2869 const void* input, 2870 void* output, 2871 pthreadpool_t threadpool); 2872 2873 #endif // XNN_NO_X8_OPERATORS 2874 2875 #ifndef XNN_NO_CVT_OPERATORS 2876 2877 enum xnn_status xnn_create_convert_nc_f16_f32( 2878 size_t channels, 2879 size_t input_stride, 2880 size_t output_stride, 2881 uint32_t flags, 2882 xnn_operator_t* convert_op_out); 2883 2884 enum xnn_status xnn_setup_convert_nc_f16_f32( 2885 xnn_operator_t convert_op, 2886 size_t batch_size, 2887 const void* input, 2888 float* output, 2889 pthreadpool_t threadpool); 2890 2891 enum xnn_status xnn_create_convert_nc_f32_f16( 2892 size_t channels, 2893 size_t input_stride, 2894 size_t output_stride, 2895 uint32_t flags, 2896 xnn_operator_t* convert_op_out); 2897 2898 enum xnn_status xnn_setup_convert_nc_f32_f16( 2899 xnn_operator_t convert_op, 2900 size_t batch_size, 2901 const float* input, 2902 void* output, 2903 pthreadpool_t threadpool); 2904 2905 enum xnn_status xnn_create_convert_nc_f32_qs8( 2906 size_t channels, 2907 size_t input_stride, 2908 size_t output_stride, 2909 float output_scale, 2910 int8_t output_zero_point, 2911 int8_t output_min, 2912 int8_t output_max, 2913 uint32_t flags, 2914 xnn_operator_t* convert_op_out); 2915 2916 enum xnn_status xnn_setup_convert_nc_f32_qs8( 2917 xnn_operator_t convert_op, 2918 size_t batch_size, 2919 const float* input, 2920 int8_t* output, 2921 pthreadpool_t threadpool); 2922 2923 enum xnn_status xnn_create_convert_nc_f32_qu8( 2924 size_t channels, 2925 size_t input_stride, 2926 size_t output_stride, 2927 float output_scale, 2928 uint8_t output_zero_point, 2929 uint8_t output_min, 2930 uint8_t output_max, 2931 uint32_t flags, 2932 xnn_operator_t* convert_op_out); 2933 2934 enum xnn_status xnn_setup_convert_nc_f32_qu8( 2935 xnn_operator_t convert_op, 2936 size_t batch_size, 2937 const float* input, 2938 uint8_t* output, 2939 pthreadpool_t threadpool); 2940 2941 enum xnn_status xnn_create_convert_nc_qs8_f32( 2942 size_t channels, 2943 size_t input_stride, 2944 size_t output_stride, 2945 float input_scale, 2946 int8_t input_zero_point, 2947 uint32_t flags, 2948 xnn_operator_t* convert_op_out); 2949 2950 enum xnn_status xnn_setup_convert_nc_qs8_f32( 2951 xnn_operator_t convert_op, 2952 size_t batch_size, 2953 const int8_t* input, 2954 float* output, 2955 pthreadpool_t threadpool); 2956 2957 enum xnn_status xnn_create_convert_nc_qu8_f32( 2958 size_t channels, 2959 size_t input_stride, 2960 size_t output_stride, 2961 float input_scale, 2962 uint8_t input_zero_point, 2963 uint32_t flags, 2964 xnn_operator_t* convert_op_out); 2965 2966 enum xnn_status xnn_setup_convert_nc_qu8_f32( 2967 xnn_operator_t convert_op, 2968 size_t batch_size, 2969 const uint8_t* input, 2970 float* output, 2971 pthreadpool_t threadpool); 2972 2973 #endif // XNN_NO_CVT_OPERATORS 2974 2975 #ifdef __cplusplus 2976 } // extern "C" 2977 #endif 2978