1 // Copyright (c) Facebook, Inc. and its affiliates. 2 // All rights reserved. 3 // 4 // Copyright 2019 Google LLC 5 // 6 // This source code is licensed under the BSD-style license found in the 7 // LICENSE file in the root directory of this source tree. 8 9 #pragma once 10 11 #include <stdbool.h> 12 #include <stddef.h> 13 #include <stdint.h> 14 15 #include <pthreadpool.h> 16 17 #ifdef __cplusplus 18 extern "C" { 19 #endif 20 21 /// The number of bytes XNNPACK may read beyond array bounds. 22 /// The caller must allocate at this this many extra bytes after the tensor data passed to XNNPACK. 23 /// 24 /// Note: XNNPACK reads, but never writes beyond array bounds. 25 #define XNN_EXTRA_BYTES 16 26 27 /// Maximum number of dimensions in tensor shape. 28 #define XNN_MAX_TENSOR_DIMS 6 29 30 /// The convolution operator represents a depthwise convolution, and use HWGo layout for filters. 31 #define XNN_FLAG_DEPTHWISE_CONVOLUTION 0x00000001 32 33 /// Assume transposed weights in a fully connected operator. 34 #define XNN_FLAG_TRANSPOSE_WEIGHTS 0x00000001 35 36 /// The operator assumes NHWC layout for the input, regardless of the output layout. 37 #define XNN_FLAG_INPUT_NHWC 0x00000002 38 39 /// Match "SAME" padding in TensorFlow. Exact padding values are computed dynamically depending on input size. 40 #define XNN_FLAG_TENSORFLOW_SAME_PADDING 0x00000004 41 42 /// Match behaviour of TensorFlow 1.x. 43 #define XNN_FLAG_TENSORFLOW_LEGACY_MODE 0x00000004 44 45 /// Align corners of input and output images in resize operations. 46 #define XNN_FLAG_ALIGN_CORNERS 0x00000008 47 48 /// Status code for any XNNPACK function call. 49 enum xnn_status { 50 /// The call succeeded, and all output arguments now contain valid data. 51 xnn_status_success = 0, 52 xnn_status_uninitialized = 1, 53 xnn_status_invalid_parameter = 2, 54 xnn_status_invalid_state = 3, 55 xnn_status_unsupported_parameter = 4, 56 xnn_status_unsupported_hardware = 5, 57 xnn_status_out_of_memory = 6, 58 }; 59 60 struct xnn_allocator { 61 /// User-specified pointer that will be passed as-is to all functions in this structure. 62 void* context; 63 /// Pointer to a function to be called for general memory allocation. 64 /// 65 /// @param context - The user-specified pointer from xnn_allocator structure. 66 /// @param size - The size of the memory block to allocate, in bytes. 67 /// 68 /// @returns Pointer to the allocated memory block of at least @ref size bytes. 69 /// If allocation fails, the function must return NULL. 70 void* (*allocate)(void* context, size_t size); 71 /// Pointer to a function to be called for general memory re-allocation, i.e. to increase or shrink a previously 72 /// allocated memory block. The content of the old memory block is copied to the new memory block. 73 /// 74 /// @param context - The user-specified pointer from xnn_allocator structure. 75 /// @param pointer - Pointer to a memory block allocated by @ref allocate or @ref reallocate functions. Can be NULL. 76 /// If the pointer is NULL, the @ref reallocate call is equivalent to an @ref allocate call. 77 /// @param size - The new size of the memory block to allocate, in bytes. 78 /// 79 /// @returns Pointer to the newly allocated memory block of at least @ref size bytes with the content of the previous 80 /// memory block. 81 /// If allocation fails, the function must return NULL, but must not release the previous memory block. 82 void* (*reallocate)(void* context, void* pointer, size_t size); 83 /// Pointer to a function to be called for general memory de-allocation. 84 /// 85 /// @param context - The user-specified pointer from xnn_allocator structure. 86 /// @param pointer - Pointer to a memory block allocated by @ref allocate or @ref reallocate functions. Can be NULL. 87 /// If the pointer is NULL, the @ref deallocate call is a no-op. 88 void (*deallocate)(void* context, void* pointer); 89 /// Pointer to a function to be called for aligned memory allocation. 90 /// 91 /// @param context - The user-specified pointer from xnn_allocator structure. 92 /// @param alignment - The alignment of the memory block to allocate, in bytes. Alignment is always a power-of-2. 93 /// @param size - The size of the memory block to allocate, in bytes. 94 /// 95 /// @returns Pointer to the allocated memory block of at least @ref size bytes. 96 /// If allocation fails, the function must return NULL. 97 void* (*aligned_allocate)(void* context, size_t alignment, size_t size); 98 /// Pointer to a function to be called for aligned memory de-allocation. 99 /// 100 /// @param context - The user-specified pointer from xnn_allocator structure. 101 /// @param pointer - Pointer to a memory block allocated by @ref aligned_allocate function. Can be NULL. 102 /// If the pointer is NULL, the @ref aligned_deallocate call is a no-op. 103 void (*aligned_deallocate)(void* context, void* pointer); 104 }; 105 106 /// Initialize XNNPACK library. 107 /// 108 /// XNNPACK must be successfully initialized before use. 109 /// During initialization, XNNPACK populates internal structures depending on host processor. It can be time-consuming. 110 /// 111 /// @param[in] allocator - structure with function pointers to be use for memory allocation and de-allocation. 112 /// If this argument is NULL, system-provided memory management functions (e.g. malloc/free) 113 /// will be used. 114 /// 115 /// @retval xnn_status_success - XNNPACK is succesfully initialized and ready to use. 116 /// @retval xnn_status_out_of_memory - initialization failed due to out-of-memory condition. 117 /// @retval xnn_status_unsupported_hardware - initialization failed because the host processor does not satisfy the 118 /// minimum hardware requirements for XNNPACK. E.g. this may happen on x86 119 /// processors without SSE2 extension, or on 32-bit ARM processors without 120 /// the NEON SIMD extension. 121 enum xnn_status xnn_initialize(const struct xnn_allocator* allocator); 122 123 /// Deinitialize XNNPACK library. 124 /// 125 /// To avoid memory and resource leaks, users must call xnn_deinitialize once for each successful xnn_initialize call. 126 /// 127 /// @retval xnn_status_success - deinitialization call succeeded. 128 enum xnn_status xnn_deinitialize(void); 129 130 /// Subgraph is an abstract representation of a neural network model. 131 /// Subgraph objects are used to define Values (tensors) and Nodes (operators) comprising the model. 132 typedef struct xnn_subgraph* xnn_subgraph_t; 133 134 /// Create a empty Subgraph object. 135 /// 136 /// @param external_value_ids - number of Value IDs to reserve for communication with external graph representation. 137 /// The Subgraph object would avoid creating internal Value IDs in the 138 /// [0, reserved_value_ids-1] range. 139 /// @param flags - binary features of the subgraph. No supported flags are currently defined. 140 /// @param subgraph_out - pointer to the variable that will be initialized with a handle to the Subgraph object upon 141 /// successful return. 142 enum xnn_status xnn_create_subgraph( 143 uint32_t external_value_ids, 144 uint32_t flags, 145 xnn_subgraph_t* subgraph_out); 146 147 /// Destroy a Subgraph object, as well as Values, and Nodes associated with the subgraph. 148 /// 149 /// @param subgraph - the Subgraph object to destroy. 150 enum xnn_status xnn_delete_subgraph( 151 xnn_subgraph_t subgraph); 152 153 #define XNN_VALUE_FLAG_EXTERNAL_INPUT 0x00000001 154 #define XNN_VALUE_FLAG_EXTERNAL_OUTPUT 0x00000002 155 156 #define XNN_INVALID_VALUE_ID UINT32_MAX 157 158 /// Type of elements in a Value object. 159 enum xnn_datatype { 160 /// Invalid data type. Valid Values never have this datatype. 161 xnn_datatype_invalid = 0, 162 /// IEEE754 single-precision floating-point. 163 xnn_datatype_fp32 = 1, 164 /// IEEE754 half-precision floating-point. 165 xnn_datatype_fp16 = 2, 166 }; 167 168 /// Define a tensor-type Value and add it to a Subgraph. 169 /// 170 /// @param subgraph - a Subgraph object that will own the created Value. 171 /// @param datatype - type of the tensor elements. 172 /// @param num_dims - number of dimensions in the shape. 173 /// @param dims - pointer to an array of @a num_dims shape dimensions. If num_dims is 0, this pointer can be NULL. 174 /// XNNPACK does not keep any pointers to this array after the function returns. 175 /// @param data - pointer to static data used for tensor initialization. If the tensor is not statically initialized, 176 /// this pointer must be is NULL. If non-NULL, the life-time of the static data must exceed the life-time 177 /// of the Subgraph object, and of any Runtime objects created from the Subgraph. 178 /// @param external_id - external ID for the Value. The ID must be within the range of reversed Value IDs specified on 179 /// the Subgraph creation. If the external ID is XNN_INVALID_VALUE_ID, an internal ID will be 180 /// created for the Value. 181 /// @param flags - binary features of the Value. Supported values are any combination of XNN_VALUE_FLAG_EXTERNAL_INPUT 182 /// and XNN_VALUE_FLAG_EXTERNAL_OUTPUT. 183 /// @param id_out - pointer to the variable that will be initialized with the Value ID upon successful return. If a 184 /// valid @a external_id was provided, the variable will be initialized with the @a external_id value. 185 enum xnn_status xnn_define_tensor_value( 186 xnn_subgraph_t subgraph, 187 enum xnn_datatype datatype, 188 size_t num_dims, 189 const size_t* dims, 190 const void* data, 191 uint32_t external_id, 192 uint32_t flags, 193 uint32_t* id_out); 194 195 /// Define a 2D Convolution Node and add it to a Subgraph. 196 /// 197 /// @param subgraph - a Subgraph object that will own the created Node. 198 /// @param input_padding_top - implicit zero-padding above 2D input data. 199 /// @param input_padding_right - implicit zero-padding to the right of 2D input data. 200 /// @param input_padding_bottom - implicit zero-padding below 2D input data. 201 /// @param input_padding_left - implicit zero-padding to the left of 2D input data. 202 /// @param kernel_height - kernel (filter) height. 203 /// @param kernel_width - kernel (filter) width. 204 /// @param subsampling_height - height of subsampling region for convolution output (convolution height stride). 205 /// @param subsampling_width - width of subsampling region for convolution output (convolution width stride). 206 /// @param dilation_height - dilation of kernel elements along the height dimension. 207 /// @param dilation_width - dilation of kernel elements along the width dimension. 208 /// @param groups - number of convolution groups. 209 /// @param group_input_channels - number of input channels per group. 210 /// @param group_output_channels - number of output channels per group. 211 /// @param output_min - lower bound for clipping output values. 212 /// @param output_max - upper bound for clipping output values. 213 /// @param input_id - Value ID for the input tensor. The input tensor must be a 4D tensor defined in the @a subgraph 214 /// with [N, IH, IW, groups * group_input_channels] dimensions 215 /// @param filter_id - Value ID for the filter tensor. The filter tensor must ge a 4D tensor defined in the @a subgraph 216 /// with [groups * group_output_channels, kernel_height, kernel_width, group_input_channels] 217 /// dimensions. 218 /// @param bias_id - Value ID for the bias tensor. The bias tensor must be a 1D tensor defined in the @a subgraph with 219 /// [groups * group_output_channels] dimensions. 220 /// @param output_id - Value ID for the output tensor. The output tensor must be a 4D tensor defined in the @a subgraph 221 /// with [N, OH, OW, groups * group_output_channels] dimensions. 222 /// @param flags - binary features of the 2D Convolution Node. The only currently supported values is 223 /// XNN_FLAG_TENSORFLOW_SAME_PADDING. 224 enum xnn_status xnn_define_convolution_2d( 225 xnn_subgraph_t subgraph, 226 uint32_t input_padding_top, 227 uint32_t input_padding_right, 228 uint32_t input_padding_bottom, 229 uint32_t input_padding_left, 230 uint32_t kernel_height, 231 uint32_t kernel_width, 232 uint32_t subsampling_height, 233 uint32_t subsampling_width, 234 uint32_t dilation_height, 235 uint32_t dilation_width, 236 uint32_t groups, 237 size_t group_input_channels, 238 size_t group_output_channels, 239 float output_min, 240 float output_max, 241 uint32_t input_id, 242 uint32_t filter_id, 243 uint32_t bias_id, 244 uint32_t output_id, 245 uint32_t flags); 246 247 /// Define a 2D Depthwise Convolution Node and add it to a Subgraph. 248 /// 249 /// @param subgraph - a Subgraph object that will own the created Node. 250 /// @param input_padding_top - implicit zero-padding above 2D input data. 251 /// @param input_padding_right - implicit zero-padding to the right of 2D input data. 252 /// @param input_padding_bottom - implicit zero-padding below 2D input data. 253 /// @param input_padding_left - implicit zero-padding to the left of 2D input data. 254 /// @param kernel_height - kernel (filter) height. 255 /// @param kernel_width - kernel (filter) width. 256 /// @param subsampling_height - height of subsampling region for convolution output (convolution height stride). 257 /// @param subsampling_width - width of subsampling region for convolution output (convolution width stride). 258 /// @param dilation_height - dilation of kernel elements along the height dimension. 259 /// @param dilation_width - dilation of kernel elements along the width dimension. 260 /// @param depth_multiplier - ratio of output channels to input channels. 261 /// @param input_channels - number of input channels. 262 /// @param output_min - lower bound for clipping output values. 263 /// @param output_max - upper bound for clipping output values. 264 /// @param input_id - Value ID for the input tensor. The input tensor must be a 4D tensor defined in the @a subgraph 265 /// with [N, IH, IW, input_channels] dimensions 266 /// @param filter_id - Value ID for the filter tensor. The filter tensor must ge a 4D tensor defined in the @a subgraph 267 /// with [1, kernel_height, kernel_width, input_channels * depth_multiplier] dimensions. 268 /// @param bias_id - Value ID for the bias tensor. The bias tensor must be a 1D tensor defined in the @a subgraph with 269 /// [input_channels * depth_multiplier] dimensions. 270 /// @param output_id - Value ID for the output tensor. The output tensor must be a 4D tensor defined in the @a subgraph 271 /// with [N, OH, OW, input_channels * depth_multiplier] dimensions. 272 /// @param flags - binary features of the 2D Depthwise Convolution Node. The only currently supported values is 273 /// XNN_FLAG_TENSORFLOW_SAME_PADDING. 274 enum xnn_status xnn_define_depthwise_convolution_2d( 275 xnn_subgraph_t subgraph, 276 uint32_t input_padding_top, 277 uint32_t input_padding_right, 278 uint32_t input_padding_bottom, 279 uint32_t input_padding_left, 280 uint32_t kernel_height, 281 uint32_t kernel_width, 282 uint32_t subsampling_height, 283 uint32_t subsampling_width, 284 uint32_t dilation_height, 285 uint32_t dilation_width, 286 uint32_t depth_multiplier, 287 size_t input_channels, 288 float output_min, 289 float output_max, 290 uint32_t input_id, 291 uint32_t filter_id, 292 uint32_t bias_id, 293 uint32_t output_id, 294 uint32_t flags); 295 296 /// Define a 2-Input Add Node and add it to a Subgraph. 297 /// 298 /// The 2-Input Add Node computes elementwise addition of two tensor inputs with numpy broadcasting rules. 299 /// 300 /// @param subgraph - a Subgraph object that will own the created Node. 301 /// @param output_min - lower bound for clipping output values. 302 /// @param output_max - upper bound for clipping output values. 303 /// @param input1_id - Value ID for the first input tensor. The input tensor must be an N-dimensional tensor defined in 304 /// the @a subgraph with each dimension either equal to the corresponding dimension of the second 305 /// input, or equal to 1. In the latter case, the elements of the input tensor are broadcasted along 306 /// that dimension. 307 /// @param input2_id - Value ID for the second input tensor. The input tensor must be an M-dimensional tensor defined in 308 /// the @a subgraph with each dimension either equal to the corresponding dimension of the first 309 /// input, or equal to 1. In the latter case, the elements of the input tensor are broadcasted along 310 /// that dimension. 311 /// @param output_id - Value ID for the output tensor. The output tensor must be a max(N,M)-dimensional tensor defined 312 /// in the @a subgraph with each dimension equal to the maximum between the corresponding dimension 313 /// of the two inputs. 314 /// @param flags - binary features of the Add Node. No supported flags are currently defined. 315 enum xnn_status xnn_define_add2( 316 xnn_subgraph_t subgraph, 317 float output_min, 318 float output_max, 319 uint32_t input1_id, 320 uint32_t input2_id, 321 uint32_t output_id, 322 uint32_t flags); 323 324 /// Define a 2-Input Multiply Node and add it to a Subgraph. 325 /// 326 /// The 2-Input Multiply Node computes elementwise multiplication of two tensor inputs with numpy broadcasting rules. 327 /// 328 /// @param subgraph - a Subgraph object that will own the created Node. 329 /// @param output_min - lower bound for clipping output values. 330 /// @param output_max - upper bound for clipping output values. 331 /// @param input1_id - Value ID for the first input tensor. The input tensor must be an N-dimensional tensor defined in 332 /// the @a subgraph with each dimension either equal to the corresponding dimension of the second 333 /// input, or equal to 1. In the latter case, the elements of the input tensor are broadcasted along 334 /// that dimension. 335 /// @param input2_id - Value ID for the second input tensor. The input tensor must be an M-dimensional tensor defined in 336 /// the @a subgraph with each dimension either equal to the corresponding dimension of the first 337 /// input, or equal to 1. In the latter case, the elements of the input tensor are broadcasted along 338 /// that dimension. 339 /// @param output_id - Value ID for the output tensor. The output tensor must be a max(N,M)-dimensional tensor defined 340 /// in the @a subgraph with each dimension equal to the maximum between the corresponding dimension 341 /// of the two inputs. 342 /// @param flags - binary features of the Multiply Node. No supported flags are currently defined. 343 enum xnn_status xnn_define_multiply2( 344 xnn_subgraph_t subgraph, 345 float output_min, 346 float output_max, 347 uint32_t input1_id, 348 uint32_t input2_id, 349 uint32_t output_id, 350 uint32_t flags); 351 352 /// Define a PReLU (Parametric ReLU) Node and add it to a Subgraph. 353 /// 354 /// @param subgraph - a Subgraph object that will own the created Node. 355 /// @param input_id - Value ID for the input tensor. The input tensor must be a 4D tensor defined in the @a subgraph 356 /// with [N, H, W, channels] dimensions 357 /// @param slope_id - Value ID for the bias tensor. The bias tensor must be a 1D tensor defined in the @a subgraph with 358 /// [channels] dimensions. 359 /// @param output_id - Value ID for the output tensor. The output tensor must be a 4D tensor defined in the @a subgraph 360 /// with [N, H, W, channels] dimensions. 361 /// @param flags - binary features of the PReLU Node. No supported flags are currently defined. 362 enum xnn_status xnn_define_prelu( 363 xnn_subgraph_t subgraph, 364 uint32_t input_id, 365 uint32_t slope_id, 366 uint32_t output_id, 367 uint32_t flags); 368 369 /// Define a Clamp Node and add it to a Subgraph. 370 /// 371 /// @param subgraph - a Subgraph object that will own the created Node. 372 /// @param output_min - lower bound for clipping output values. 373 /// @param output_max - upper bound for clipping output values. 374 /// @param input_id - Value ID for the input tensor. The input tensor must be defined in the @a subgraph. 375 /// @param output_id - Value ID for the output tensor. The output tensor must be defined in the @a subgraph, and its 376 /// shape must match the shape of the input tensor. 377 /// @param flags - binary features of the Clamp Node. No supported flags are currently defined. 378 enum xnn_status xnn_define_clamp( 379 xnn_subgraph_t subgraph, 380 float output_min, 381 float output_max, 382 uint32_t input_id, 383 uint32_t output_id, 384 uint32_t flags); 385 386 /// Define a HardSwish Node and add it to a Subgraph. 387 /// 388 /// @param subgraph - a Subgraph object that will own the created Node. 389 /// @param input_id - Value ID for the input tensor. The input tensor must be defined in the @a subgraph. 390 /// @param output_id - Value ID for the output tensor. The output tensor must be defined in the @a subgraph, and its 391 /// shape must match the shape of the input tensor. 392 /// @param flags - binary features of the HardSwish Node. No supported flags are currently defined. 393 enum xnn_status xnn_define_hardswish( 394 xnn_subgraph_t subgraph, 395 uint32_t input_id, 396 uint32_t output_id, 397 uint32_t flags); 398 399 /// Define a Sigmoid Node and add it to a Subgraph. 400 /// 401 /// @param subgraph - a Subgraph object that will own the created Node. 402 /// @param input_id - Value ID for the input tensor. The input tensor must be defined in the @a subgraph. 403 /// @param output_id - Value ID for the output tensor. The output tensor must be defined in the @a subgraph, and its 404 /// shape must match the shape of the input tensor. 405 /// @param flags - binary features of the Sigmoid Node. No supported flags are currently defined. 406 enum xnn_status xnn_define_sigmoid( 407 xnn_subgraph_t subgraph, 408 uint32_t input_id, 409 uint32_t output_id, 410 uint32_t flags); 411 412 /// Define a SoftMax Node and add it to a Subgraph. 413 /// 414 /// @param subgraph - a Subgraph object that will own the created Node. 415 /// @param input_id - Value ID for the input tensor. The input tensor must be defined in the @a subgraph, and have at 416 /// least one dimension. 417 /// @param output_id - Value ID for the output tensor. The output tensor must be defined in the @a subgraph, and its 418 /// shape must match the shape of the input tensor. 419 /// @param flags - binary features of the SoftMax Node. No supported flags are currently defined. 420 enum xnn_status xnn_define_softmax( 421 xnn_subgraph_t subgraph, 422 uint32_t input_id, 423 uint32_t output_id, 424 uint32_t flags); 425 426 /// Runtime is a combination of an execution plan for subgraph Nodes and a memory manager for subgraph Values. 427 typedef struct xnn_runtime* xnn_runtime_t; 428 429 /// Create a empty Runtime object from a subgraph. 430 /// 431 /// @param subgraph - a Subgraph object with all Values and Nodes that would be handled by the runtime. No Values or 432 /// Nodes can be added to the runtime once it is constructed. 433 /// @param threadpool - the thread pool to be used for parallelisation of computations in the runtime. If the thread 434 /// pool is NULL, the computation would run on the caller thread without parallelization. 435 /// @param flags - binary features of the subgraph. No supported flags are currently defined. 436 /// @param runtime_out - pointer to the variable that will be initialized with a handle to the Runtime object upon 437 /// successful return. Once constructed, the Runtime object is independent of the Subgraph object 438 /// used to create it. 439 enum xnn_status xnn_create_runtime_v2( 440 xnn_subgraph_t subgraph, 441 pthreadpool_t threadpool, 442 uint32_t flags, 443 xnn_runtime_t* runtime_out); 444 445 enum xnn_status xnn_create_runtime( 446 xnn_subgraph_t subgraph, 447 xnn_runtime_t* runtime_out); 448 449 struct xnn_external_value { 450 uint32_t id; 451 void* data; 452 }; 453 454 /// Setup data pointers for external inputs and outputs in a Runtime object. 455 /// 456 /// @param runtime - a Runtime object created with @ref xnn_create_runtime or @ref xnn_create_runtime_v2. 457 /// @param num_external_values - the number of external inputs and outputs specified in this call. This number must 458 /// match the number of external inputs and outputs in the runtime, i.e. all external 459 /// inputs and outputs in the runtime must be specified in one call. 460 /// @param external_values - array with location information for all external inputs and outputs in the runtime. 461 enum xnn_status xnn_setup_runtime( 462 xnn_runtime_t runtime, 463 size_t num_external_values, 464 const struct xnn_external_value* external_values); 465 466 /// Execute forward pass for all operators in the runtime. 467 /// 468 /// @param runtime - the Runtime object with the execution plan to invoke. 469 enum xnn_status xnn_invoke_runtime( 470 xnn_runtime_t runtime); 471 472 /// Destroy a Runtime object, as well as operators and memory associated with it. 473 /// 474 /// @param runtime - the Runtime object to destroy. 475 enum xnn_status xnn_delete_runtime( 476 xnn_runtime_t runtime); 477 478 typedef struct xnn_operator* xnn_operator_t; 479 480 enum xnn_status xnn_run_operator( 481 xnn_operator_t op, 482 pthreadpool_t threadpool); 483 484 enum xnn_status xnn_delete_operator( 485 xnn_operator_t op); 486 487 #ifndef XNN_NO_F32_OPERATORS 488 489 enum xnn_status xnn_create_add_nc_f32( 490 size_t channels, 491 size_t a_stride, 492 size_t b_stride, 493 size_t sum_stride, 494 float sum_min, 495 float sum_max, 496 uint32_t flags, 497 xnn_operator_t* add_op_out); 498 499 enum xnn_status xnn_setup_add_nc_f32( 500 xnn_operator_t add_op, 501 size_t batch_size, 502 const float* a, 503 const float* b, 504 float* sum, 505 pthreadpool_t threadpool); 506 507 enum xnn_status xnn_create_add_nd_f32( 508 float output_min, 509 float output_max, 510 uint32_t flags, 511 xnn_operator_t* add_op_out); 512 513 enum xnn_status xnn_setup_add_nd_f32( 514 xnn_operator_t add_op, 515 size_t num_input1_dims, 516 const size_t* input1_shape, 517 size_t num_input2_dims, 518 const size_t* input2_shape, 519 const float* input1, 520 const float* input2, 521 float* output, 522 pthreadpool_t threadpool); 523 524 enum xnn_status xnn_create_argmax_pooling2d_nhwc_f32( 525 uint32_t input_padding_top, 526 uint32_t input_padding_right, 527 uint32_t input_padding_bottom, 528 uint32_t input_padding_left, 529 uint32_t pooling_height, 530 uint32_t pooling_width, 531 size_t channels, 532 size_t input_pixel_stride, 533 size_t output_pixel_stride, 534 float output_min, 535 float output_max, 536 uint32_t flags, 537 xnn_operator_t* argmax_pooling_op_out); 538 539 enum xnn_status xnn_setup_argmax_pooling2d_nhwc_f32( 540 xnn_operator_t argmax_pooling_op, 541 size_t batch_size, 542 size_t input_height, 543 size_t input_width, 544 const float* input, 545 float* output, 546 uint32_t* index, 547 pthreadpool_t threadpool); 548 549 enum xnn_status xnn_create_average_pooling2d_nhwc_f32( 550 uint32_t input_padding_top, 551 uint32_t input_padding_right, 552 uint32_t input_padding_bottom, 553 uint32_t input_padding_left, 554 uint32_t pooling_height, 555 uint32_t pooling_width, 556 uint32_t stride_height, 557 uint32_t stride_width, 558 size_t channels, 559 size_t input_pixel_stride, 560 size_t output_pixel_stride, 561 float output_min, 562 float output_max, 563 uint32_t flags, 564 xnn_operator_t* average_pooling_op_out); 565 566 enum xnn_status xnn_setup_average_pooling2d_nhwc_f32( 567 xnn_operator_t average_pooling_op, 568 size_t batch_size, 569 size_t input_height, 570 size_t input_width, 571 const float* input, 572 float* output, 573 pthreadpool_t threadpool); 574 575 enum xnn_status xnn_create_clamp_nc_f32( 576 size_t channels, 577 size_t input_stride, 578 size_t output_stride, 579 float output_min, 580 float output_max, 581 uint32_t flags, 582 xnn_operator_t* clamp_op_out); 583 584 enum xnn_status xnn_setup_clamp_nc_f32( 585 xnn_operator_t clamp_op, 586 size_t batch_size, 587 const float* input, 588 float* output, 589 pthreadpool_t threadpool); 590 591 enum xnn_status xnn_create_convolution2d_nhwc_f32( 592 uint32_t input_padding_top, 593 uint32_t input_padding_right, 594 uint32_t input_padding_bottom, 595 uint32_t input_padding_left, 596 uint32_t kernel_height, 597 uint32_t kernel_width, 598 uint32_t subsampling_height, 599 uint32_t subsampling_width, 600 uint32_t dilation_height, 601 uint32_t dilation_width, 602 uint32_t groups, 603 size_t group_input_channels, 604 size_t group_output_channels, 605 size_t input_pixel_stride, 606 size_t output_pixel_stride, 607 const float* kernel, 608 const float* bias, 609 float output_min, 610 float output_max, 611 uint32_t flags, 612 xnn_operator_t* convolution_op_out); 613 614 enum xnn_status xnn_setup_convolution2d_nhwc_f32( 615 xnn_operator_t convolution_op, 616 size_t batch_size, 617 size_t input_height, 618 size_t input_width, 619 const float* input, 620 float* output, 621 pthreadpool_t threadpool); 622 623 enum xnn_status xnn_create_deconvolution2d_nhwc_f32( 624 uint32_t output_padding_top, 625 uint32_t output_padding_right, 626 uint32_t output_padding_bottom, 627 uint32_t output_padding_left, 628 uint32_t kernel_height, 629 uint32_t kernel_width, 630 uint32_t stride_height, 631 uint32_t stride_width, 632 uint32_t dilation_height, 633 uint32_t dilation_width, 634 uint32_t groups, 635 size_t group_input_channels, 636 size_t group_output_channels, 637 size_t input_pixel_stride, 638 size_t output_pixel_stride, 639 const float* kernel, 640 const float* bias, 641 float output_min, 642 float output_max, 643 uint32_t flags, 644 xnn_operator_t* deconvolution_op_out); 645 646 enum xnn_status xnn_setup_deconvolution2d_nhwc_f32( 647 xnn_operator_t deconvolution_op, 648 size_t batch_size, 649 size_t input_height, 650 size_t input_width, 651 uint32_t adjustment_height, 652 uint32_t adjustment_width, 653 const float* input, 654 float* output, 655 pthreadpool_t threadpool); 656 657 enum xnn_status xnn_create_divide_nd_f32( 658 float output_min, 659 float output_max, 660 uint32_t flags, 661 xnn_operator_t* divide_op_out); 662 663 enum xnn_status xnn_setup_divide_nd_f32( 664 xnn_operator_t divide_op, 665 size_t num_input1_dims, 666 const size_t* input1_shape, 667 size_t num_input2_dims, 668 const size_t* input2_shape, 669 const float* input1, 670 const float* input2, 671 float* output, 672 pthreadpool_t threadpool); 673 674 enum xnn_status xnn_create_fully_connected_nc_f32( 675 size_t input_channels, 676 size_t output_channels, 677 size_t input_stride, 678 size_t output_stride, 679 const float* kernel, 680 const float* bias, 681 float output_min, 682 float output_max, 683 uint32_t flags, 684 xnn_operator_t* fully_connected_op_out); 685 686 enum xnn_status xnn_setup_fully_connected_nc_f32( 687 xnn_operator_t fully_connected_op, 688 size_t batch_size, 689 const float* input, 690 float* output, 691 pthreadpool_t threadpool); 692 693 enum xnn_status xnn_create_global_average_pooling_nwc_f32( 694 size_t channels, 695 size_t input_stride, 696 size_t output_stride, 697 float output_min, 698 float output_max, 699 uint32_t flags, 700 xnn_operator_t* global_average_pooling_op_out); 701 702 enum xnn_status xnn_setup_global_average_pooling_nwc_f32( 703 xnn_operator_t global_average_pooling_op, 704 size_t batch_size, 705 size_t width, 706 const float* input, 707 float* output, 708 pthreadpool_t threadpool); 709 710 enum xnn_status xnn_create_hardswish_nc_f32( 711 size_t channels, 712 size_t input_stride, 713 size_t output_stride, 714 uint32_t flags, 715 xnn_operator_t* hardswish_op_out); 716 717 enum xnn_status xnn_setup_hardswish_nc_f32( 718 xnn_operator_t hardswish_op, 719 size_t batch_size, 720 const float* input, 721 float* output, 722 pthreadpool_t threadpool); 723 724 enum xnn_status xnn_create_max_pooling2d_nhwc_f32( 725 uint32_t input_padding_top, 726 uint32_t input_padding_right, 727 uint32_t input_padding_bottom, 728 uint32_t input_padding_left, 729 uint32_t pooling_height, 730 uint32_t pooling_width, 731 uint32_t stride_height, 732 uint32_t stride_width, 733 uint32_t dilation_height, 734 uint32_t dilation_width, 735 size_t channels, 736 size_t input_pixel_stride, 737 size_t output_pixel_stride, 738 float output_min, 739 float output_max, 740 uint32_t flags, 741 xnn_operator_t* max_pooling_op_out); 742 743 enum xnn_status xnn_setup_max_pooling2d_nhwc_f32( 744 xnn_operator_t max_pooling_op, 745 size_t batch_size, 746 size_t input_height, 747 size_t input_width, 748 const float* input, 749 float* output, 750 pthreadpool_t threadpool); 751 752 enum xnn_status xnn_create_maximum_nd_f32( 753 uint32_t flags, 754 xnn_operator_t* maximum_op_out); 755 756 enum xnn_status xnn_setup_maximum_nd_f32( 757 xnn_operator_t maximum_op, 758 size_t num_input1_dims, 759 const size_t* input1_shape, 760 size_t num_input2_dims, 761 const size_t* input2_shape, 762 const float* input1, 763 const float* input2, 764 float* output, 765 pthreadpool_t threadpool); 766 767 enum xnn_status xnn_create_minimum_nd_f32( 768 uint32_t flags, 769 xnn_operator_t* minimum_op_out); 770 771 enum xnn_status xnn_setup_minimum_nd_f32( 772 xnn_operator_t minimum_op, 773 size_t num_input1_dims, 774 const size_t* input1_shape, 775 size_t num_input2_dims, 776 const size_t* input2_shape, 777 const float* input1, 778 const float* input2, 779 float* output, 780 pthreadpool_t threadpool); 781 782 enum xnn_status xnn_create_multiply_nd_f32( 783 float output_min, 784 float output_max, 785 uint32_t flags, 786 xnn_operator_t* multiply_op_out); 787 788 enum xnn_status xnn_setup_multiply_nd_f32( 789 xnn_operator_t multiply_op, 790 size_t num_input1_dims, 791 const size_t* input1_shape, 792 size_t num_input2_dims, 793 const size_t* input2_shape, 794 const float* input1, 795 const float* input2, 796 float* output, 797 pthreadpool_t threadpool); 798 799 enum xnn_status xnn_create_prelu_nc_f32( 800 size_t channels, 801 size_t input_stride, 802 size_t output_stride, 803 const float* negative_slope, 804 float output_min, 805 float output_max, 806 uint32_t flags, 807 xnn_operator_t* prelu_op_out); 808 809 enum xnn_status xnn_setup_prelu_nc_f32( 810 xnn_operator_t prelu_op, 811 size_t batch_size, 812 const float* input, 813 float* output, 814 pthreadpool_t threadpool); 815 816 enum xnn_status xnn_create_resize_bilinear2d_nhwc_f32( 817 size_t channels, 818 size_t input_pixel_stride, 819 size_t output_pixel_stride, 820 uint32_t flags, 821 xnn_operator_t* resize_op_out); 822 823 enum xnn_status xnn_setup_resize_bilinear2d_nhwc_f32( 824 xnn_operator_t resize_op, 825 size_t batch_size, 826 size_t input_height, 827 size_t input_width, 828 size_t output_height, 829 size_t output_width, 830 const float* input, 831 float* output, 832 pthreadpool_t threadpool); 833 834 enum xnn_status xnn_create_sigmoid_nc_f32( 835 size_t channels, 836 size_t input_stride, 837 size_t output_stride, 838 uint32_t flags, 839 xnn_operator_t* sigmoid_op_out); 840 841 enum xnn_status xnn_setup_sigmoid_nc_f32( 842 xnn_operator_t sigmoid_op, 843 size_t batch_size, 844 const float* input, 845 float* output, 846 pthreadpool_t threadpool); 847 848 enum xnn_status xnn_create_softmax_nc_f32( 849 size_t channels, 850 size_t input_stride, 851 size_t output_stride, 852 uint32_t flags, 853 xnn_operator_t* softmax_op_out); 854 855 enum xnn_status xnn_setup_softmax_nc_f32( 856 xnn_operator_t softmax_op, 857 size_t batch_size, 858 const float* input, 859 float* output, 860 pthreadpool_t threadpool); 861 862 enum xnn_status xnn_create_subtract_nd_f32( 863 float output_min, 864 float output_max, 865 uint32_t flags, 866 xnn_operator_t* subtract_op_out); 867 868 enum xnn_status xnn_setup_subtract_nd_f32( 869 xnn_operator_t subtract_op, 870 size_t num_input1_dims, 871 const size_t* input1_shape, 872 size_t num_input2_dims, 873 const size_t* input2_shape, 874 const float* input1, 875 const float* input2, 876 float* output, 877 pthreadpool_t threadpool); 878 879 #ifndef XNN_NO_NCHW_OPERATORS 880 881 enum xnn_status xnn_create_convolution2d_nchw_f32( 882 uint32_t input_padding_top, 883 uint32_t input_padding_right, 884 uint32_t input_padding_bottom, 885 uint32_t input_padding_left, 886 uint32_t kernel_height, 887 uint32_t kernel_width, 888 uint32_t subsampling_height, 889 uint32_t subsampling_width, 890 uint32_t dilation_height, 891 uint32_t dilation_width, 892 uint32_t groups, 893 size_t group_input_channels, 894 size_t group_output_channels, 895 const float* kernel, 896 const float* bias, 897 float output_min, 898 float output_max, 899 uint32_t flags, 900 xnn_operator_t* convolution_op_out); 901 902 enum xnn_status xnn_setup_convolution2d_nchw_f32( 903 xnn_operator_t convolution_op, 904 size_t batch_size, 905 size_t input_batch_stride, 906 size_t output_batch_stride, 907 size_t input_height, 908 size_t input_width, 909 const float* input, 910 float* output, 911 pthreadpool_t threadpool); 912 913 enum xnn_status xnn_create_global_average_pooling_ncw_f32( 914 size_t channels, 915 float output_min, 916 float output_max, 917 uint32_t flags, 918 xnn_operator_t* global_average_pooling_op_out); 919 920 enum xnn_status xnn_setup_global_average_pooling_ncw_f32( 921 xnn_operator_t global_average_pooling_op, 922 size_t batch_size, 923 size_t width, 924 const float* input, 925 float* output, 926 pthreadpool_t threadpool); 927 928 #endif // XNN_NO_NCHW_OPERATORS 929 930 #endif // XNN_NO_F32_OPERATORS 931 932 #ifndef XNN_NO_X32_OPERATORS 933 934 enum xnn_status xnn_create_channel_pad_nc_x32( 935 size_t input_channels, 936 size_t pad_before_channels, 937 size_t pad_after_channels, 938 size_t input_stride, 939 size_t output_stride, 940 const void* pad_value, 941 uint32_t flags, 942 xnn_operator_t* channel_pad_op_out); 943 944 enum xnn_status xnn_setup_channel_pad_nc_x32( 945 xnn_operator_t channel_pad_op, 946 size_t batch_size, 947 const void* input, 948 void* output, 949 pthreadpool_t threadpool); 950 951 enum xnn_status xnn_create_channel_shuffle_nc_x32( 952 size_t groups, 953 size_t group_channels, 954 size_t input_stride, 955 size_t output_stride, 956 uint32_t flags, 957 xnn_operator_t* channel_shuffle_op_out); 958 959 enum xnn_status xnn_setup_channel_shuffle_nc_x32( 960 xnn_operator_t channel_shuffle_op, 961 size_t batch_size, 962 const void* input, 963 void* output, 964 pthreadpool_t threadpool); 965 966 enum xnn_status xnn_create_unpooling2d_nhwc_x32( 967 uint32_t input_padding_top, 968 uint32_t input_padding_right, 969 uint32_t input_padding_bottom, 970 uint32_t input_padding_left, 971 uint32_t pooling_height, 972 uint32_t pooling_width, 973 size_t channels, 974 size_t input_pixel_stride, 975 size_t output_pixel_stride, 976 uint32_t flags, 977 xnn_operator_t* unpooling_op_out); 978 979 enum xnn_status xnn_setup_unpooling2d_nhwc_x32( 980 xnn_operator_t unpooling_op, 981 size_t batch_size, 982 size_t input_height, 983 size_t input_width, 984 const void* input, 985 const uint32_t* index, 986 void* output, 987 pthreadpool_t threadpool); 988 989 #endif // XNN_NO_X32_OPERATORS 990 991 #ifndef XNN_NO_Q8_OPERATORS 992 993 enum xnn_status xnn_create_add_nc_q8( 994 size_t channels, 995 size_t a_stride, 996 size_t b_stride, 997 size_t sum_stride, 998 uint8_t a_zero_point, 999 float a_scale, 1000 uint8_t b_zero_point, 1001 float b_scale, 1002 uint8_t sum_zero_point, 1003 float sum_scale, 1004 uint8_t sum_min, 1005 uint8_t sum_max, 1006 uint32_t flags, 1007 xnn_operator_t* add_op_out); 1008 1009 enum xnn_status xnn_setup_add_nc_q8( 1010 xnn_operator_t add_op, 1011 size_t batch_size, 1012 const uint8_t* a, 1013 const uint8_t* b, 1014 uint8_t* sum, 1015 pthreadpool_t threadpool); 1016 1017 enum xnn_status xnn_create_average_pooling2d_nhwc_q8( 1018 uint32_t input_padding_top, 1019 uint32_t input_padding_right, 1020 uint32_t input_padding_bottom, 1021 uint32_t input_padding_left, 1022 uint32_t pooling_height, 1023 uint32_t pooling_width, 1024 uint32_t stride_height, 1025 uint32_t stride_width, 1026 size_t channels, 1027 size_t input_pixel_stride, 1028 size_t output_pixel_stride, 1029 uint8_t input_zero_point, 1030 float input_scale, 1031 uint8_t output_zero_point, 1032 float output_scale, 1033 uint8_t output_min, 1034 uint8_t output_max, 1035 uint32_t flags, 1036 xnn_operator_t* average_pooling_op_out); 1037 1038 enum xnn_status xnn_setup_average_pooling2d_nhwc_q8( 1039 xnn_operator_t average_pooling_op, 1040 size_t batch_size, 1041 size_t input_height, 1042 size_t input_width, 1043 const uint8_t* input, 1044 uint8_t* output, 1045 pthreadpool_t threadpool); 1046 1047 enum xnn_status xnn_create_convolution2d_nhwc_q8( 1048 uint32_t input_padding_top, 1049 uint32_t input_padding_right, 1050 uint32_t input_padding_bottom, 1051 uint32_t input_padding_left, 1052 uint32_t kernel_height, 1053 uint32_t kernel_width, 1054 uint32_t subsampling_height, 1055 uint32_t subsampling_width, 1056 uint32_t dilation_height, 1057 uint32_t dilation_width, 1058 uint32_t groups, 1059 size_t group_input_channels, 1060 size_t group_output_channels, 1061 size_t input_pixel_stride, 1062 size_t output_pixel_stride, 1063 uint8_t input_zero_point, 1064 float input_scale, 1065 uint8_t kernel_zero_point, 1066 float kernel_scale, 1067 const uint8_t* kernel, 1068 const int32_t* bias, 1069 uint8_t output_zero_point, 1070 float output_scale, 1071 uint8_t output_min, 1072 uint8_t output_max, 1073 uint32_t flags, 1074 xnn_operator_t* convolution_op_out); 1075 1076 enum xnn_status xnn_setup_convolution2d_nhwc_q8( 1077 xnn_operator_t convolution_op, 1078 size_t batch_size, 1079 size_t input_height, 1080 size_t input_width, 1081 const uint8_t* input, 1082 uint8_t* output, 1083 pthreadpool_t threadpool); 1084 1085 enum xnn_status xnn_create_deconvolution2d_nhwc_q8( 1086 uint32_t output_padding_top, 1087 uint32_t output_padding_right, 1088 uint32_t output_padding_bottom, 1089 uint32_t output_padding_left, 1090 uint32_t kernel_height, 1091 uint32_t kernel_width, 1092 uint32_t stride_height, 1093 uint32_t stride_width, 1094 uint32_t dilation_height, 1095 uint32_t dilation_width, 1096 uint32_t groups, 1097 size_t group_input_channels, 1098 size_t group_output_channels, 1099 size_t input_pixel_stride, 1100 size_t output_pixel_stride, 1101 uint8_t input_zero_point, 1102 float input_scale, 1103 uint8_t kernel_zero_point, 1104 float kernel_scale, 1105 const uint8_t* kernel, 1106 const int32_t* bias, 1107 uint8_t output_zero_point, 1108 float output_scale, 1109 uint8_t output_min, 1110 uint8_t output_max, 1111 uint32_t flags, 1112 xnn_operator_t* deconvolution_op_out); 1113 1114 enum xnn_status xnn_setup_deconvolution2d_nhwc_q8( 1115 xnn_operator_t deconvolution_op, 1116 size_t batch_size, 1117 size_t input_height, 1118 size_t input_width, 1119 uint32_t adjustment_height, 1120 uint32_t adjustment_width, 1121 const uint8_t* input, 1122 uint8_t* output, 1123 pthreadpool_t threadpool); 1124 1125 enum xnn_status xnn_create_fully_connected_nc_q8( 1126 size_t input_channels, 1127 size_t output_channels, 1128 size_t input_stride, 1129 size_t output_stride, 1130 uint8_t input_zero_point, 1131 float input_scale, 1132 uint8_t kernel_zero_point, 1133 float kernel_scale, 1134 const uint8_t* kernel, 1135 const int32_t* bias, 1136 uint8_t output_zero_point, 1137 float output_scale, 1138 uint8_t output_min, 1139 uint8_t output_max, 1140 uint32_t flags, 1141 xnn_operator_t* fully_connected_op_out); 1142 1143 enum xnn_status xnn_setup_fully_connected_nc_q8( 1144 xnn_operator_t fully_connected_op, 1145 size_t batch_size, 1146 const uint8_t* input, 1147 uint8_t* output, 1148 pthreadpool_t threadpool); 1149 1150 enum xnn_status xnn_create_global_average_pooling_nwc_q8( 1151 size_t channels, 1152 size_t input_stride, 1153 size_t output_stride, 1154 uint8_t input_zero_point, 1155 float input_scale, 1156 uint8_t output_zero_point, 1157 float output_scale, 1158 uint8_t output_min, 1159 uint8_t output_max, 1160 uint32_t flags, 1161 xnn_operator_t* global_average_pooling_op_out); 1162 1163 enum xnn_status xnn_setup_global_average_pooling_nwc_q8( 1164 xnn_operator_t global_average_pooling_op, 1165 size_t batch_size, 1166 size_t width, 1167 const uint8_t* input, 1168 uint8_t* output, 1169 pthreadpool_t threadpool); 1170 1171 enum xnn_status xnn_create_leaky_relu_nc_q8( 1172 size_t channels, 1173 size_t input_stride, 1174 size_t output_stride, 1175 float negative_slope, 1176 uint8_t input_zero_point, 1177 float input_scale, 1178 uint8_t output_zero_point, 1179 float output_scale, 1180 uint8_t output_min, 1181 uint8_t output_max, 1182 uint32_t flags, 1183 xnn_operator_t* leaky_relu_op_out); 1184 1185 enum xnn_status xnn_setup_leaky_relu_nc_q8( 1186 xnn_operator_t leaky_relu_op, 1187 size_t batch_size, 1188 const uint8_t* input, 1189 uint8_t* output, 1190 pthreadpool_t threadpool); 1191 1192 enum xnn_status xnn_create_sigmoid_nc_q8( 1193 size_t channels, 1194 size_t input_stride, 1195 size_t output_stride, 1196 uint8_t input_zero_point, 1197 float input_scale, 1198 uint8_t output_zero_point, 1199 float output_scale, 1200 uint8_t output_min, 1201 uint8_t output_max, 1202 uint32_t flags, 1203 xnn_operator_t* sigmoid_op_out); 1204 1205 enum xnn_status xnn_setup_sigmoid_nc_q8( 1206 xnn_operator_t sigmoid_op, 1207 size_t batch_size, 1208 const uint8_t* input, 1209 uint8_t* output, 1210 pthreadpool_t threadpool); 1211 1212 enum xnn_status xnn_create_softmax_nc_q8( 1213 size_t channels, 1214 size_t input_stride, 1215 size_t output_stride, 1216 float input_scale, 1217 uint8_t output_zero_point, 1218 float output_scale, 1219 uint32_t flags, 1220 xnn_operator_t* softmax_op_out); 1221 1222 enum xnn_status xnn_setup_softmax_nc_q8( 1223 xnn_operator_t softmax_op, 1224 size_t batch_size, 1225 const uint8_t* input, 1226 uint8_t* output, 1227 pthreadpool_t threadpool); 1228 1229 #endif // XNN_NO_Q8_OPERATORS 1230 1231 #ifndef XNN_NO_U8_OPERATORS 1232 1233 enum xnn_status xnn_create_clamp_nc_u8( 1234 size_t channels, 1235 size_t input_stride, 1236 size_t output_stride, 1237 uint8_t output_min, 1238 uint8_t output_max, 1239 uint32_t flags, 1240 xnn_operator_t* clamp_op_out); 1241 1242 enum xnn_status xnn_setup_clamp_nc_u8( 1243 xnn_operator_t clamp_op, 1244 size_t batch_size, 1245 const uint8_t* input, 1246 uint8_t* output, 1247 pthreadpool_t threadpool); 1248 1249 enum xnn_status xnn_create_max_pooling2d_nhwc_u8( 1250 uint32_t input_padding_top, 1251 uint32_t input_padding_right, 1252 uint32_t input_padding_bottom, 1253 uint32_t input_padding_left, 1254 uint32_t pooling_height, 1255 uint32_t pooling_width, 1256 uint32_t stride_height, 1257 uint32_t stride_width, 1258 uint32_t dilation_height, 1259 uint32_t dilation_width, 1260 size_t channels, 1261 size_t input_pixel_stride, 1262 size_t output_pixel_stride, 1263 uint8_t output_min, 1264 uint8_t output_max, 1265 uint32_t flags, 1266 xnn_operator_t* max_pooling_op_out); 1267 1268 enum xnn_status xnn_setup_max_pooling2d_nhwc_u8( 1269 xnn_operator_t max_pooling_op, 1270 size_t batch_size, 1271 size_t input_height, 1272 size_t input_width, 1273 const uint8_t* input, 1274 uint8_t* output, 1275 pthreadpool_t threadpool); 1276 1277 #endif // XNN_NO_U8_OPERATORS 1278 1279 #ifndef XNN_NO_X8_OPERATORS 1280 1281 enum xnn_status xnn_create_channel_shuffle_nc_x8( 1282 size_t groups, 1283 size_t group_channels, 1284 size_t input_stride, 1285 size_t output_stride, 1286 uint32_t flags, 1287 xnn_operator_t* channel_shuffle_op_out); 1288 1289 enum xnn_status xnn_setup_channel_shuffle_nc_x8( 1290 xnn_operator_t channel_shuffle_op, 1291 size_t batch_size, 1292 const void* input, 1293 void* output, 1294 pthreadpool_t threadpool); 1295 1296 #endif // XNN_NO_X8_OPERATORS 1297 1298 #ifdef __cplusplus 1299 } // extern "C" 1300 #endif 1301