1 // Copyright (c) Facebook, Inc. and its affiliates. 2 // All rights reserved. 3 // 4 // Copyright 2019 Google LLC 5 // 6 // This source code is licensed under the BSD-style license found in the 7 // LICENSE file in the root directory of this source tree. 8 9 #pragma once 10 11 #include <stdbool.h> 12 #include <stddef.h> 13 #include <stdint.h> 14 15 #include <pthreadpool.h> 16 17 #ifdef __cplusplus 18 extern "C" { 19 #endif 20 21 /// The number of bytes XNNPACK may read beyond array bounds. 22 /// The caller must allocate at least this many extra bytes after the tensor data passed to XNNPACK. 23 /// 24 /// Note: XNNPACK reads, but never writes beyond array bounds. 25 #define XNN_EXTRA_BYTES 16 26 27 /// Maximum number of dimensions in tensor shape. 28 #define XNN_MAX_TENSOR_DIMS 6 29 30 /// Allow sparse inference in a Runtime. 31 /// 32 /// Note: this flag forces XNNPACK to consider sparse inference, but does not guarantee it. 33 #define XNN_FLAG_SPARSE_INFERENCE 0x00000001 34 35 /// The convolution operator represents a depthwise convolution, and use HWGo layout for filters. 36 #define XNN_FLAG_DEPTHWISE_CONVOLUTION 0x00000001 37 38 /// Assume transposed weights in a fully connected operator. 39 #define XNN_FLAG_TRANSPOSE_WEIGHTS 0x00000001 40 41 /// The operator assumes NHWC layout for the input, regardless of the output layout. 42 #define XNN_FLAG_INPUT_NHWC 0x00000002 43 44 /// Match "SAME" padding in TensorFlow. Exact padding values are computed dynamically depending on input size. 45 #define XNN_FLAG_TENSORFLOW_SAME_PADDING 0x00000004 46 47 /// Implicitly flatten and reshape input of a Fully Connected operator into a 2D 48 /// tensor. 49 #define XNN_FLAG_TENSORFLOW_RESHAPE_2D 0x00000004 50 51 /// Match behaviour of TensorFlow 1.x. 52 #define XNN_FLAG_TENSORFLOW_LEGACY_MODE 0x00000004 53 54 /// Align corners of input and output images in resize operations. 55 #define XNN_FLAG_ALIGN_CORNERS 0x00000008 56 57 /// Status code for any XNNPACK function call. 58 enum xnn_status { 59 /// The call succeeded, and all output arguments now contain valid data. 60 xnn_status_success = 0, 61 xnn_status_uninitialized = 1, 62 xnn_status_invalid_parameter = 2, 63 xnn_status_invalid_state = 3, 64 xnn_status_unsupported_parameter = 4, 65 xnn_status_unsupported_hardware = 5, 66 xnn_status_out_of_memory = 6, 67 }; 68 69 struct xnn_allocator { 70 /// User-specified pointer that will be passed as-is to all functions in this structure. 71 void* context; 72 /// Pointer to a function to be called for general memory allocation. 73 /// 74 /// @param context - The user-specified pointer from xnn_allocator structure. 75 /// @param size - The size of the memory block to allocate, in bytes. 76 /// 77 /// @returns Pointer to the allocated memory block of at least @ref size bytes. 78 /// If allocation fails, the function must return NULL. 79 void* (*allocate)(void* context, size_t size); 80 /// Pointer to a function to be called for general memory re-allocation, i.e. to increase or shrink a previously 81 /// allocated memory block. The content of the old memory block is copied to the new memory block. 82 /// 83 /// @param context - The user-specified pointer from xnn_allocator structure. 84 /// @param pointer - Pointer to a memory block allocated by @ref allocate or @ref reallocate functions. Can be NULL. 85 /// If the pointer is NULL, the @ref reallocate call is equivalent to an @ref allocate call. 86 /// @param size - The new size of the memory block to allocate, in bytes. 87 /// 88 /// @returns Pointer to the newly allocated memory block of at least @ref size bytes with the content of the previous 89 /// memory block. 90 /// If allocation fails, the function must return NULL, but must not release the previous memory block. 91 void* (*reallocate)(void* context, void* pointer, size_t size); 92 /// Pointer to a function to be called for general memory de-allocation. 93 /// 94 /// @param context - The user-specified pointer from xnn_allocator structure. 95 /// @param pointer - Pointer to a memory block allocated by @ref allocate or @ref reallocate functions. Can be NULL. 96 /// If the pointer is NULL, the @ref deallocate call is a no-op. 97 void (*deallocate)(void* context, void* pointer); 98 /// Pointer to a function to be called for aligned memory allocation. 99 /// 100 /// @param context - The user-specified pointer from xnn_allocator structure. 101 /// @param alignment - The alignment of the memory block to allocate, in bytes. Alignment is always a power-of-2. 102 /// @param size - The size of the memory block to allocate, in bytes. 103 /// 104 /// @returns Pointer to the allocated memory block of at least @ref size bytes. 105 /// If allocation fails, the function must return NULL. 106 void* (*aligned_allocate)(void* context, size_t alignment, size_t size); 107 /// Pointer to a function to be called for aligned memory de-allocation. 108 /// 109 /// @param context - The user-specified pointer from xnn_allocator structure. 110 /// @param pointer - Pointer to a memory block allocated by @ref aligned_allocate function. Can be NULL. 111 /// If the pointer is NULL, the @ref aligned_deallocate call is a no-op. 112 void (*aligned_deallocate)(void* context, void* pointer); 113 }; 114 115 /// Initialize XNNPACK library. 116 /// 117 /// XNNPACK must be successfully initialized before use. 118 /// During initialization, XNNPACK populates internal structures depending on host processor. It can be time-consuming. 119 /// 120 /// @param[in] allocator - structure with function pointers to be use for memory allocation and de-allocation. 121 /// If this argument is NULL, system-provided memory management functions (e.g. malloc/free) 122 /// will be used. 123 /// 124 /// @retval xnn_status_success - XNNPACK is succesfully initialized and ready to use. 125 /// @retval xnn_status_out_of_memory - initialization failed due to out-of-memory condition. 126 /// @retval xnn_status_unsupported_hardware - initialization failed because the host processor does not satisfy the 127 /// minimum hardware requirements for XNNPACK. E.g. this may happen on x86 128 /// processors without SSE2 extension, or on 32-bit ARM processors without 129 /// the NEON SIMD extension. 130 enum xnn_status xnn_initialize(const struct xnn_allocator* allocator); 131 132 /// Deinitialize XNNPACK library. 133 /// 134 /// To avoid memory and resource leaks, users must call xnn_deinitialize once for each successful xnn_initialize call. 135 /// 136 /// @retval xnn_status_success - deinitialization call succeeded. 137 enum xnn_status xnn_deinitialize(void); 138 139 /// Subgraph is an abstract representation of a neural network model. 140 /// Subgraph objects are used to define Values (tensors) and Nodes (operators) comprising the model. 141 typedef struct xnn_subgraph* xnn_subgraph_t; 142 143 /// Create a empty Subgraph object. 144 /// 145 /// @param external_value_ids - number of Value IDs to reserve for communication with external graph representation. 146 /// The Subgraph object would avoid creating internal Value IDs in the 147 /// [0, reserved_value_ids-1] range. 148 /// @param flags - binary features of the subgraph. No supported flags are currently defined. 149 /// @param subgraph_out - pointer to the variable that will be initialized with a handle to the Subgraph object upon 150 /// successful return. 151 enum xnn_status xnn_create_subgraph( 152 uint32_t external_value_ids, 153 uint32_t flags, 154 xnn_subgraph_t* subgraph_out); 155 156 /// Destroy a Subgraph object, as well as Values, and Nodes associated with the subgraph. 157 /// 158 /// @param subgraph - the Subgraph object to destroy. 159 enum xnn_status xnn_delete_subgraph( 160 xnn_subgraph_t subgraph); 161 162 #define XNN_VALUE_FLAG_EXTERNAL_INPUT 0x00000001 163 #define XNN_VALUE_FLAG_EXTERNAL_OUTPUT 0x00000002 164 165 #define XNN_INVALID_VALUE_ID UINT32_MAX 166 167 /// Type of elements in a Value object. 168 enum xnn_datatype { 169 /// Invalid data type. Valid Values never have this datatype. 170 xnn_datatype_invalid = 0, 171 /// IEEE754 single-precision floating-point. 172 xnn_datatype_fp32 = 1, 173 /// IEEE754 half-precision floating-point. 174 xnn_datatype_fp16 = 2, 175 }; 176 177 /// Define a tensor-type Value and add it to a Subgraph. 178 /// 179 /// @param subgraph - a Subgraph object that will own the created Value. 180 /// @param datatype - type of the tensor elements. 181 /// @param num_dims - number of dimensions in the shape. 182 /// @param dims - pointer to an array of @a num_dims shape dimensions. If num_dims is 0, this pointer can be NULL. 183 /// XNNPACK does not keep any pointers to this array after the function returns. 184 /// @param data - pointer to static data used for tensor initialization. If the tensor is not statically initialized, 185 /// this pointer must be is NULL. If non-NULL, the life-time of the static data must exceed the life-time 186 /// of the Subgraph object, and of any Runtime objects created from the Subgraph. 187 /// @param external_id - external ID for the Value. The ID must be within the range of reversed Value IDs specified on 188 /// the Subgraph creation. If the external ID is XNN_INVALID_VALUE_ID, an internal ID will be 189 /// created for the Value. 190 /// @param flags - binary features of the Value. Supported values are any combination of XNN_VALUE_FLAG_EXTERNAL_INPUT 191 /// and XNN_VALUE_FLAG_EXTERNAL_OUTPUT. 192 /// @param id_out - pointer to the variable that will be initialized with the Value ID upon successful return. If a 193 /// valid @a external_id was provided, the variable will be initialized with the @a external_id value. 194 enum xnn_status xnn_define_tensor_value( 195 xnn_subgraph_t subgraph, 196 enum xnn_datatype datatype, 197 size_t num_dims, 198 const size_t* dims, 199 const void* data, 200 uint32_t external_id, 201 uint32_t flags, 202 uint32_t* id_out); 203 204 /// Define a 2D Convolution Node and add it to a Subgraph. 205 /// 206 /// @param subgraph - a Subgraph object that will own the created Node. 207 /// @param input_padding_top - implicit zero-padding above 2D input data. Must be 0 if XNN_FLAG_TENSORFLOW_SAME_PADDING 208 /// flag is specified. 209 /// @param input_padding_right - implicit zero-padding to the right of 2D input data. Must be 0 if 210 /// XNN_FLAG_TENSORFLOW_SAME_PADDING flag is specified. 211 /// @param input_padding_bottom - implicit zero-padding below 2D input data. Must be 0 if 212 /// XNN_FLAG_TENSORFLOW_SAME_PADDING flag is specified. 213 /// @param input_padding_left - implicit zero-padding to the left of 2D input data. Must be 0 if 214 /// XNN_FLAG_TENSORFLOW_SAME_PADDING flag is specified. 215 /// @param kernel_height - kernel (filter) height. 216 /// @param kernel_width - kernel (filter) width. 217 /// @param subsampling_height - height of subsampling region for convolution output (convolution height stride). 218 /// @param subsampling_width - width of subsampling region for convolution output (convolution width stride). 219 /// @param dilation_height - dilation of kernel elements along the height dimension. 220 /// @param dilation_width - dilation of kernel elements along the width dimension. 221 /// @param groups - number of convolution groups. 222 /// @param group_input_channels - number of input channels per group. 223 /// @param group_output_channels - number of output channels per group. 224 /// @param output_min - lower bound for clipping output values. 225 /// @param output_max - upper bound for clipping output values. 226 /// @param input_id - Value ID for the input tensor. The input tensor must be a 4D tensor defined in the @a subgraph 227 /// with [N, IH, IW, groups * group_input_channels] dimensions 228 /// @param filter_id - Value ID for the filter tensor. The filter tensor must ge a 4D tensor defined in the @a subgraph 229 /// with [groups * group_output_channels, kernel_height, kernel_width, group_input_channels] 230 /// dimensions. 231 /// @param bias_id - Value ID for the bias tensor. The bias tensor must be a 1D tensor defined in the @a subgraph with 232 /// [groups * group_output_channels] dimensions. 233 /// @param output_id - Value ID for the output tensor. The output tensor must be a 4D tensor defined in the @a subgraph 234 /// with [N, OH, OW, groups * group_output_channels] dimensions. 235 /// @param flags - binary features of the 2D Convolution Node. The only currently supported values is 236 /// XNN_FLAG_TENSORFLOW_SAME_PADDING. 237 enum xnn_status xnn_define_convolution_2d( 238 xnn_subgraph_t subgraph, 239 uint32_t input_padding_top, 240 uint32_t input_padding_right, 241 uint32_t input_padding_bottom, 242 uint32_t input_padding_left, 243 uint32_t kernel_height, 244 uint32_t kernel_width, 245 uint32_t subsampling_height, 246 uint32_t subsampling_width, 247 uint32_t dilation_height, 248 uint32_t dilation_width, 249 uint32_t groups, 250 size_t group_input_channels, 251 size_t group_output_channels, 252 float output_min, 253 float output_max, 254 uint32_t input_id, 255 uint32_t filter_id, 256 uint32_t bias_id, 257 uint32_t output_id, 258 uint32_t flags); 259 260 /// Define a 2D Deconvolution (Transposed Convolution) Node and add it to a Subgraph. 261 /// 262 /// @param subgraph - a Subgraph object that will own the created Node. 263 /// @param padding_top - implicit padding above 2D output data. 264 /// @param padding_right - implicit padding to the right of 2D output data. 265 /// @param padding_bottom - implicit padding below 2D output data. 266 /// @param padding_left - implicit padding to the left of 2D output data. 267 /// @param adjustment_height - additional elements in the bottom of the 2D output data. 268 /// @param adjustment_width - additional elements to the right of the 2D output data. 269 /// @param kernel_height - kernel (filter) height. 270 /// @param kernel_width - kernel (filter) width. 271 /// @param upsampling_height - height of upsampling region for deconvolution input (deconvolution height stride). 272 /// @param upsampling_width - width of upsampling region for deconvolution input (deconvolution width stride). 273 /// @param dilation_height - dilation of kernel elements along the height dimension. 274 /// @param dilation_width - dilation of kernel elements along the width dimension. 275 /// @param groups - number of convolution groups. 276 /// @param group_input_channels - number of input channels per group. 277 /// @param group_output_channels - number of output channels per group. 278 /// @param output_min - lower bound for clipping output values. 279 /// @param output_max - upper bound for clipping output values. 280 /// @param input_id - Value ID for the input tensor. The input tensor must be a 4D tensor defined in the @a subgraph 281 /// with [N, IH, IW, groups * group_input_channels] dimensions 282 /// @param filter_id - Value ID for the filter tensor. The filter tensor must ge a 4D tensor defined in the @a subgraph 283 /// with [groups * group_output_channels, kernel_height, kernel_width, group_input_channels] 284 /// dimensions. 285 /// @param bias_id - Value ID for the bias tensor. The bias tensor must be a 1D tensor defined in the @a subgraph with 286 /// [groups * group_output_channels] dimensions. 287 /// @param output_id - Value ID for the output tensor. The output tensor must be a 4D tensor defined in the @a subgraph 288 /// with [N, OH, OW, groups * group_output_channels] dimensions. 289 /// @param flags - binary features of the 2D Deconvolution Node. No supported flags are currently defined. 290 enum xnn_status xnn_define_deconvolution_2d( 291 xnn_subgraph_t subgraph, 292 uint32_t padding_top, 293 uint32_t padding_right, 294 uint32_t padding_bottom, 295 uint32_t padding_left, 296 uint32_t adjustment_height, 297 uint32_t adjustment_width, 298 uint32_t kernel_height, 299 uint32_t kernel_width, 300 uint32_t upsampling_height, 301 uint32_t upsampling_width, 302 uint32_t dilation_height, 303 uint32_t dilation_width, 304 uint32_t groups, 305 size_t group_input_channels, 306 size_t group_output_channels, 307 float output_min, 308 float output_max, 309 uint32_t input_id, 310 uint32_t filter_id, 311 uint32_t bias_id, 312 uint32_t output_id, 313 uint32_t flags); 314 315 /// Define a 2D Depthwise Convolution Node and add it to a Subgraph. 316 /// 317 /// @param subgraph - a Subgraph object that will own the created Node. 318 /// @param input_padding_top - implicit zero-padding above 2D input data. Must be 0 if XNN_FLAG_TENSORFLOW_SAME_PADDING 319 /// flag is specified. 320 /// @param input_padding_right - implicit zero-padding to the right of 2D input data. Must be 0 if 321 /// XNN_FLAG_TENSORFLOW_SAME_PADDING flag is specified. 322 /// @param input_padding_bottom - implicit zero-padding below 2D input data. Must be 0 if 323 /// XNN_FLAG_TENSORFLOW_SAME_PADDING flag is specified. 324 /// @param input_padding_left - implicit zero-padding to the left of 2D input data. Must be 0 if 325 /// XNN_FLAG_TENSORFLOW_SAME_PADDING flag is specified. 326 /// @param kernel_height - kernel (filter) height. 327 /// @param kernel_width - kernel (filter) width. 328 /// @param subsampling_height - height of subsampling region for convolution output (convolution height stride). 329 /// @param subsampling_width - width of subsampling region for convolution output (convolution width stride). 330 /// @param dilation_height - dilation of kernel elements along the height dimension. 331 /// @param dilation_width - dilation of kernel elements along the width dimension. 332 /// @param depth_multiplier - ratio of output channels to input channels. 333 /// @param input_channels - number of input channels. 334 /// @param output_min - lower bound for clipping output values. 335 /// @param output_max - upper bound for clipping output values. 336 /// @param input_id - Value ID for the input tensor. The input tensor must be a 4D tensor defined in the @a subgraph 337 /// with [N, IH, IW, input_channels] dimensions 338 /// @param filter_id - Value ID for the filter tensor. The filter tensor must ge a 4D tensor defined in the @a subgraph 339 /// with [1, kernel_height, kernel_width, input_channels * depth_multiplier] dimensions. 340 /// @param bias_id - Value ID for the bias tensor. The bias tensor must be a 1D tensor defined in the @a subgraph with 341 /// [input_channels * depth_multiplier] dimensions. 342 /// @param output_id - Value ID for the output tensor. The output tensor must be a 4D tensor defined in the @a subgraph 343 /// with [N, OH, OW, input_channels * depth_multiplier] dimensions. 344 /// @param flags - binary features of the 2D Depthwise Convolution Node. The only currently supported values is 345 /// XNN_FLAG_TENSORFLOW_SAME_PADDING. 346 enum xnn_status xnn_define_depthwise_convolution_2d( 347 xnn_subgraph_t subgraph, 348 uint32_t input_padding_top, 349 uint32_t input_padding_right, 350 uint32_t input_padding_bottom, 351 uint32_t input_padding_left, 352 uint32_t kernel_height, 353 uint32_t kernel_width, 354 uint32_t subsampling_height, 355 uint32_t subsampling_width, 356 uint32_t dilation_height, 357 uint32_t dilation_width, 358 uint32_t depth_multiplier, 359 size_t input_channels, 360 float output_min, 361 float output_max, 362 uint32_t input_id, 363 uint32_t filter_id, 364 uint32_t bias_id, 365 uint32_t output_id, 366 uint32_t flags); 367 368 /// Define a DepthToSpace Node and add it to a Subgraph. 369 /// 370 /// The DepthToSpace Node rearranges data from depth into blocks of spatial data (a reverse transform for SpaceToDepth). 371 /// For a given input pixel, an output square of pixels with side @a block_size is formed from values in the corresponding 372 /// number of its channels. The output depth is therefore @a block_size x @a block_size times smaller than that of the input. 373 /// 374 /// @param subgraph - a Subgraph object that will own the created Node. 375 /// @param input_id - Value ID for the input tensor. The input tensor must be divisible by @a block_size in the channel dimension. 376 /// @param output_id - Value ID for the output tensor. 377 /// @param block_size - the size of the spatial block. 378 /// @param flags - binary features of the DepthToSpace Node. No supported flags are currently defined. 379 enum xnn_status xnn_define_depth_to_space( 380 xnn_subgraph_t subgraph, 381 uint32_t input_id, 382 uint32_t output_id, 383 uint32_t block_size, 384 uint32_t flags); 385 386 /// Define a 2D Global Average Pooling Node and add it to a Subgraph. 387 /// 388 /// @param subgraph - a Subgraph object that will own the created Node. 389 /// @param output_min - lower bound for clipping output values. 390 /// @param output_max - upper bound for clipping output values. 391 /// @param input_id - Value ID for the input tensor. The input tensor must be a 392 /// 4D tensor defined in the @a subgraph with [N, H, W, C] 393 /// dimensions 394 /// @param output_id - Value ID for the output tensor. The output tensor must be 395 /// a 4D tensor defined in the @a subgraph with [N, 1, 1, C] 396 /// dimensions. 397 /// @param flags - binary features of the 2D Global Average Pooling Node. No 398 /// supported flags are currently defined. 399 enum xnn_status xnn_define_global_average_pooling_2d( 400 xnn_subgraph_t subgraph, 401 float output_min, 402 float output_max, 403 uint32_t input_id, 404 uint32_t output_id, 405 uint32_t flags); 406 407 /// Define a 2D Average Pooling Node and add it to a Subgraph. 408 /// 409 /// @param subgraph - a Subgraph object that will own the created Node. 410 /// @param input_padding_top - implicit zero-padding above 2D input data. Must be 0 if XNN_FLAG_TENSORFLOW_SAME_PADDING 411 /// flag is specified. 412 /// @param input_padding_right - implicit zero-padding to the right of 2D input data. Must be 0 if 413 /// XNN_FLAG_TENSORFLOW_SAME_PADDING flag is specified. 414 /// @param input_padding_bottom - implicit zero-padding below 2D input data. Must be 0 if 415 /// XNN_FLAG_TENSORFLOW_SAME_PADDING flag is specified. 416 /// @param input_padding_left - implicit zero-padding to the left of 2D input data. Must be 0 if 417 /// XNN_FLAG_TENSORFLOW_SAME_PADDING flag is specified. 418 /// @param pooling_height - pooling (kernel) height. 419 /// @param pooling_width - pooling (kernel) width. 420 /// @param stride_height - displacing of the pooling window in the vertical dimension of the input pixels corresponding 421 /// to vertically adjacent output pixels. 422 /// @param stride_width - displacing of the pooling window in the horizontal dimension of the input pixels corresponding 423 /// to horizontally adjacent output pixels. 424 /// @param output_min - lower bound for clipping output values. 425 /// @param output_max - upper bound for clipping output values. 426 /// @param input_id - Value ID for the input tensor. The input tensor must be a 4D tensor defined in the @a subgraph 427 /// with [N, IH, IW, channels] dimensions 428 /// @param output_id - Value ID for the output tensor. The output tensor must be a 4D tensor defined in the @a subgraph 429 /// with [N, OH, OW, channels] dimensions. 430 /// @param flags - binary features of the 2D Average Pooling Node. The only currently supported values is 431 /// XNN_FLAG_TENSORFLOW_SAME_PADDING. 432 enum xnn_status xnn_define_average_pooling_2d( 433 xnn_subgraph_t subgraph, 434 uint32_t input_padding_top, 435 uint32_t input_padding_right, 436 uint32_t input_padding_bottom, 437 uint32_t input_padding_left, 438 uint32_t pooling_height, 439 uint32_t pooling_width, 440 uint32_t stride_height, 441 uint32_t stride_width, 442 float output_min, 443 float output_max, 444 uint32_t input_id, 445 uint32_t output_id, 446 uint32_t flags); 447 448 /// Define a Fully Connected Node and add it to a Subgraph. 449 /// 450 /// @param subgraph - a Subgraph object that will own the created Node. 451 /// @param output_min - lower bound for clipping output values. 452 /// @param output_max - upper bound for clipping output values. 453 /// @param input_id - Value ID for the input tensor. The input tensor must be an 454 /// N-dimensional tensor defined in the @a 455 /// subgraph. 456 /// If XNN_FLAG_TENSORFLOW_RESHAPE_2D is not specified, the 457 /// input tensor must be at least 1D and its last dimension 458 /// must match the last dimension of the filter tensor. In 459 /// particular, if input is a 2D tensor, it must have 460 /// [batch_size, input_channels] dimensions. If 461 /// XNN_FLAG_TENSORFLOW_RESHAPE_2D is specified, the number of 462 /// elements in the input tensor must be divisible by the 463 /// input_channels. The tensor will be first flattened into a 464 /// 1D tensor of [num_input_elements] dimensions, then 465 /// reshaped into a 2D tensor of [num_input_elements / 466 /// input_channels, input_channels] dimensions where 467 /// num_input_elements is the total number of elements in the 468 /// input tensor. 469 /// @param filter_id - Value ID for the filter tensor. The filter tensor must ge 470 /// a 2D tensor defined in the @a subgraph 471 /// with [output_channels, input_channels] dimensions. 472 /// @param bias_id - Value ID for the bias tensor. The bias tensor must be a 1D 473 /// tensor defined in the @a subgraph with 474 /// [output_channels] dimensions. 475 /// @param output_id - Value ID for the output tensor. The output tensor must be 476 /// defined in the @a subgraph. 477 /// If XNN_FLAG_TENSORFLOW_RESHAPE_2D is not specified, the 478 /// output tensor must have the same dimensionality as the 479 /// input tensor, all its dimensions but the last one must 480 /// match the corresponding dimensions of the input tensor, 481 /// and the last dimensions of the output tensor must match 482 /// the first dimension of the filter tensor. In particular, 483 /// if input is a 2D tensor, output must be a 2D tensor of 484 /// [batch_size, output_channels] dimensions. If 485 /// XNN_FLAG_TENSORFLOW_RESHAPE_2D is specified, output must 486 /// be a 2D tensor of [num_input_elements / input_channels, 487 /// output_channels] dimensions where num_input_elements is 488 /// the total number of elements in the input tensor. 489 /// @param flags - binary features of the Fully Connected Node. The only 490 /// currently supported value is XNN_FLAG_TENSORFLOW_RESHAPE_2D. 491 enum xnn_status xnn_define_fully_connected(xnn_subgraph_t subgraph, 492 float output_min, float output_max, 493 uint32_t input_id, 494 uint32_t filter_id, uint32_t bias_id, 495 uint32_t output_id, uint32_t flags); 496 497 /// Define a 2D Max Pooling Node and add it to a Subgraph. 498 /// 499 /// @param subgraph - a Subgraph object that will own the created Node. 500 /// @param input_padding_top - implicit zero-padding above 2D input data. Must be 0 if XNN_FLAG_TENSORFLOW_SAME_PADDING 501 /// flag is specified. 502 /// @param input_padding_right - implicit zero-padding to the right of 2D input data. Must be 0 if 503 /// XNN_FLAG_TENSORFLOW_SAME_PADDING flag is specified. 504 /// @param input_padding_bottom - implicit zero-padding below 2D input data. Must be 0 if 505 /// XNN_FLAG_TENSORFLOW_SAME_PADDING flag is specified. 506 /// @param input_padding_left - implicit zero-padding to the left of 2D input data. Must be 0 if 507 /// XNN_FLAG_TENSORFLOW_SAME_PADDING flag is specified. 508 /// @param pooling_height - pooling (kernel) height. 509 /// @param pooling_width - pooling (kernel) width. 510 /// @param stride_height - displacing of the pooling window in the vertical dimension of the input pixels corresponding 511 /// to vertically adjacent output pixels. 512 /// @param stride_width - displacing of the pooling window in the horizontal dimension of the input pixels corresponding 513 /// to horizontally adjacent output pixels. 514 /// @param dilation_height - dilation of pooling elements along the height dimension. 515 /// @param dilation_width - dilation of pooling elements along the width dimension. 516 /// @param output_min - lower bound for clipping output values. 517 /// @param output_max - upper bound for clipping output values. 518 /// @param input_id - Value ID for the input tensor. The input tensor must be a 4D tensor defined in the @a subgraph 519 /// with [N, IH, IW, channels] dimensions 520 /// @param output_id - Value ID for the output tensor. The output tensor must be a 4D tensor defined in the @a subgraph 521 /// with [N, OH, OW, channels] dimensions. 522 /// @param flags - binary features of the 2D Max Pooling Node. The only currently supported values is 523 /// XNN_FLAG_TENSORFLOW_SAME_PADDING. 524 enum xnn_status xnn_define_max_pooling_2d( 525 xnn_subgraph_t subgraph, 526 uint32_t input_padding_top, 527 uint32_t input_padding_right, 528 uint32_t input_padding_bottom, 529 uint32_t input_padding_left, 530 uint32_t pooling_height, 531 uint32_t pooling_width, 532 uint32_t stride_height, 533 uint32_t stride_width, 534 uint32_t dilation_height, 535 uint32_t dilation_width, 536 float output_min, 537 float output_max, 538 uint32_t input_id, 539 uint32_t output_id, 540 uint32_t flags); 541 542 /// Define a 2D ArgMax Pooling Node and add it to a Subgraph. 543 /// 544 /// @param subgraph - a Subgraph object that will own the created Node. 545 /// @param input_padding_top - implicit zero-padding above 2D input data. 546 /// @param input_padding_right - implicit zero-padding to the right of 2D input data. 547 /// @param input_padding_bottom - implicit zero-padding below 2D input data. 548 /// @param input_padding_left - implicit zero-padding to the left of 2D input data. 549 /// @param pooling_height - pooling (kernel) height. Vertical stride between pooling regions match this value. 550 /// @param pooling_width - pooling (kernel) width. Horizontal stride between pooling regions match this value. 551 /// @param input_id - Value ID for the input tensor. The input tensor must be a 4D tensor defined in the @a subgraph 552 /// with [N, IH, IW, channels] dimensions 553 /// @param output_value_id - Value ID for the output tensor with the maximum values in the pools. The output tensor must 554 /// be a 4D tensor defined in the @a subgraph with [N, OH, OW, channels] dimensions. 555 /// @param output_index_id - Value ID for the output tensor with the indexes of the maximum values in the pools. The 556 /// output tensor must be a 4D tensor defined in the @a subgraph with [N, OH, OW, channels] 557 /// dimensions. 558 /// @param flags - binary features of the 2D ArgMax Pooling Node. No supported flags are currently defined. 559 enum xnn_status xnn_define_argmax_pooling_2d( 560 xnn_subgraph_t subgraph, 561 uint32_t input_padding_top, 562 uint32_t input_padding_right, 563 uint32_t input_padding_bottom, 564 uint32_t input_padding_left, 565 uint32_t pooling_height, 566 uint32_t pooling_width, 567 uint32_t input_id, 568 uint32_t output_value_id, 569 uint32_t output_index_id, 570 uint32_t flags); 571 572 /// Define a 2D UnPooling Node and add it to a Subgraph. 573 /// 574 /// @param subgraph - a Subgraph object that will own the created Node. 575 /// @param padding_top - implicit padding above 2D output data. 576 /// @param padding_right - implicit padding to the right of 2D output data. 577 /// @param padding_bottom - implicit padding below 2D output data. 578 /// @param padding_left - implicit padding to the left of 2D output data. 579 /// @param pooling_height - height of the pooling window. 580 /// @param pooling_width - width of the pooling window. 581 /// @param input_value_id - Value ID for the input tensor with the max-pooling values to invert. The input value tensor 582 /// must be a 4D tensor defined in the @a subgraph with [N, IH, IW, channels] dimensions. 583 /// @param input_index_id - Value ID for the input tensor with the indices of the per-pool maximum values produced by 584 /// a 2D UnPooling Node. The input tensor must be a 4D tensor defined in the @a subgraph with 585 /// [N, IH, IW, channels] dimensions. 586 /// @param output_id - Value ID for the output tensor. The output tensor must be a 4D tensor defined in the @a subgraph 587 /// with [N, OH, OW, channels] dimensions. 588 /// @param flags - binary features of the 2D UnPooling Node. No supported flags are currently defined. 589 enum xnn_status xnn_define_unpooling_2d( 590 xnn_subgraph_t subgraph, 591 uint32_t padding_top, 592 uint32_t padding_right, 593 uint32_t padding_bottom, 594 uint32_t padding_left, 595 uint32_t pooling_height, 596 uint32_t pooling_width, 597 uint32_t input_value_id, 598 uint32_t input_index_id, 599 uint32_t output_id, 600 uint32_t flags); 601 602 /// Define a 2-Input Add Node and add it to a Subgraph. 603 /// 604 /// The 2-Input Add Node computes elementwise addition of two tensor inputs with numpy broadcasting rules. 605 /// 606 /// @param subgraph - a Subgraph object that will own the created Node. 607 /// @param output_min - lower bound for clipping output values. 608 /// @param output_max - upper bound for clipping output values. 609 /// @param input1_id - Value ID for the first input tensor. The input tensor must be an N-dimensional tensor defined in 610 /// the @a subgraph with each dimension either equal to the corresponding dimension of the second 611 /// input, or equal to 1. In the latter case, the elements of the input tensor are broadcasted along 612 /// that dimension. 613 /// @param input2_id - Value ID for the second input tensor. The input tensor must be an M-dimensional tensor defined in 614 /// the @a subgraph with each dimension either equal to the corresponding dimension of the first 615 /// input, or equal to 1. In the latter case, the elements of the input tensor are broadcasted along 616 /// that dimension. 617 /// @param output_id - Value ID for the output tensor. The output tensor must be a max(N,M)-dimensional tensor defined 618 /// in the @a subgraph with each dimension equal to the maximum between the corresponding dimension 619 /// of the two inputs. 620 /// @param flags - binary features of the Add Node. No supported flags are currently defined. 621 enum xnn_status xnn_define_add2( 622 xnn_subgraph_t subgraph, 623 float output_min, 624 float output_max, 625 uint32_t input1_id, 626 uint32_t input2_id, 627 uint32_t output_id, 628 uint32_t flags); 629 630 /// Define a 2-Input Multiply Node and add it to a Subgraph. 631 /// 632 /// The 2-Input Multiply Node computes elementwise multiplication of two tensor inputs with numpy broadcasting rules. 633 /// 634 /// @param subgraph - a Subgraph object that will own the created Node. 635 /// @param output_min - lower bound for clipping output values. 636 /// @param output_max - upper bound for clipping output values. 637 /// @param input1_id - Value ID for the first input tensor. The input tensor must be an N-dimensional tensor defined in 638 /// the @a subgraph with each dimension either equal to the corresponding dimension of the second 639 /// input, or equal to 1. In the latter case, the elements of the input tensor are broadcasted along 640 /// that dimension. 641 /// @param input2_id - Value ID for the second input tensor. The input tensor must be an M-dimensional tensor defined in 642 /// the @a subgraph with each dimension either equal to the corresponding dimension of the first 643 /// input, or equal to 1. In the latter case, the elements of the input tensor are broadcasted along 644 /// that dimension. 645 /// @param output_id - Value ID for the output tensor. The output tensor must be a max(N,M)-dimensional tensor defined 646 /// in the @a subgraph with each dimension equal to the maximum between the corresponding dimension 647 /// of the two inputs. 648 /// @param flags - binary features of the Multiply Node. No supported flags are currently defined. 649 enum xnn_status xnn_define_multiply2( 650 xnn_subgraph_t subgraph, 651 float output_min, 652 float output_max, 653 uint32_t input1_id, 654 uint32_t input2_id, 655 uint32_t output_id, 656 uint32_t flags); 657 658 /// Define a Subtract Node and add it to a Subgraph. 659 /// 660 /// The Subtract Node computes elementwise subtraction of two tensor inputs with numpy broadcasting rules. 661 /// 662 /// @param subgraph - a Subgraph object that will own the created Node. 663 /// @param output_min - lower bound for clipping output values. 664 /// @param output_max - upper bound for clipping output values. 665 /// @param input1_id - Value ID for the first input tensor. The input tensor must be an N-dimensional tensor defined in 666 /// the @a subgraph with each dimension either equal to the corresponding dimension of the second 667 /// input, or equal to 1. In the latter case, the elements of the input tensor are broadcasted along 668 /// that dimension. 669 /// @param input2_id - Value ID for the second input tensor. The input tensor must be an M-dimensional tensor defined in 670 /// the @a subgraph with each dimension either equal to the corresponding dimension of the first 671 /// input, or equal to 1. In the latter case, the elements of the input tensor are broadcasted along 672 /// that dimension. 673 /// @param output_id - Value ID for the output tensor. The output tensor must be a max(N,M)-dimensional tensor defined 674 /// in the @a subgraph with each dimension equal to the maximum between the corresponding dimension 675 /// of the two inputs. 676 /// @param flags - binary features of the Subtract Node. No supported flags are currently defined. 677 enum xnn_status xnn_define_subtract( 678 xnn_subgraph_t subgraph, 679 float output_min, 680 float output_max, 681 uint32_t input1_id, 682 uint32_t input2_id, 683 uint32_t output_id, 684 uint32_t flags); 685 686 /// Define a Divide Node and add it to a Subgraph. 687 /// 688 /// The Divide Node computes elementwise division of two tensor inputs with numpy broadcasting rules. 689 /// 690 /// @param subgraph - a Subgraph object that will own the created Node. 691 /// @param output_min - lower bound for clipping output values. 692 /// @param output_max - upper bound for clipping output values. 693 /// @param input1_id - Value ID for the first input tensor. The input tensor must be an N-dimensional tensor defined in 694 /// the @a subgraph with each dimension either equal to the corresponding dimension of the second 695 /// input, or equal to 1. In the latter case, the elements of the input tensor are broadcasted along 696 /// that dimension. 697 /// @param input2_id - Value ID for the second input tensor. The input tensor must be an M-dimensional tensor defined in 698 /// the @a subgraph with each dimension either equal to the corresponding dimension of the first 699 /// input, or equal to 1. In the latter case, the elements of the input tensor are broadcasted along 700 /// that dimension. 701 /// @param output_id - Value ID for the output tensor. The output tensor must be a max(N,M)-dimensional tensor defined 702 /// in the @a subgraph with each dimension equal to the maximum between the corresponding dimension 703 /// of the two inputs. 704 /// @param flags - binary features of the Divide Node. No supported flags are currently defined. 705 enum xnn_status xnn_define_divide( 706 xnn_subgraph_t subgraph, 707 float output_min, 708 float output_max, 709 uint32_t input1_id, 710 uint32_t input2_id, 711 uint32_t output_id, 712 uint32_t flags); 713 714 /// Define a 2-Input Maximum Node and add it to a Subgraph. 715 /// 716 /// The 2-Input Maximum Node computes elementwise maximum of two tensor inputs with numpy broadcasting rules. 717 /// 718 /// @param subgraph - a Subgraph object that will own the created Node. 719 /// @param input1_id - Value ID for the first input tensor. The input tensor must be an N-dimensional tensor defined in 720 /// the @a subgraph with each dimension either equal to the corresponding dimension of the second 721 /// input, or equal to 1. In the latter case, the elements of the input tensor are broadcasted along 722 /// that dimension. 723 /// @param input2_id - Value ID for the second input tensor. The input tensor must be an M-dimensional tensor defined in 724 /// the @a subgraph with each dimension either equal to the corresponding dimension of the first 725 /// input, or equal to 1. In the latter case, the elements of the input tensor are broadcasted along 726 /// that dimension. 727 /// @param output_id - Value ID for the output tensor. The output tensor must be a max(N,M)-dimensional tensor defined 728 /// in the @a subgraph with each dimension equal to the maximum between the corresponding dimension 729 /// of the two inputs. 730 /// @param flags - binary features of the Maximum Node. No supported flags are currently defined. 731 enum xnn_status xnn_define_maximum2( 732 xnn_subgraph_t subgraph, 733 uint32_t input1_id, 734 uint32_t input2_id, 735 uint32_t output_id, 736 uint32_t flags); 737 738 /// Define a 2-Input Minimum Node and add it to a Subgraph. 739 /// 740 /// The 2-Input Minimum Node computes elementwise minimum of two tensor inputs with numpy broadcasting rules. 741 /// 742 /// @param subgraph - a Subgraph object that will own the created Node. 743 /// @param input1_id - Value ID for the first input tensor. The input tensor must be an N-dimensional tensor defined in 744 /// the @a subgraph with each dimension either equal to the corresponding dimension of the second 745 /// input, or equal to 1. In the latter case, the elements of the input tensor are broadcasted along 746 /// that dimension. 747 /// @param input2_id - Value ID for the second input tensor. The input tensor must be an M-dimensional tensor defined in 748 /// the @a subgraph with each dimension either equal to the corresponding dimension of the first 749 /// input, or equal to 1. In the latter case, the elements of the input tensor are broadcasted along 750 /// that dimension. 751 /// @param output_id - Value ID for the output tensor. The output tensor must be a max(N,M)-dimensional tensor defined 752 /// in the @a subgraph with each dimension equal to the maximum between the corresponding dimension 753 /// of the two inputs. 754 /// @param flags - binary features of the Minimum Node. No supported flags are currently defined. 755 enum xnn_status xnn_define_minimum2( 756 xnn_subgraph_t subgraph, 757 uint32_t input1_id, 758 uint32_t input2_id, 759 uint32_t output_id, 760 uint32_t flags); 761 762 /// Define a Squared Difference Node and add it to a Subgraph. 763 /// 764 /// The Squared Difference Node computes elementwise squared difference of two tensor inputs with numpy broadcasting 765 /// rules. 766 /// 767 /// @param subgraph - a Subgraph object that will own the created Node. 768 /// @param input1_id - Value ID for the first input tensor. The input tensor must be an N-dimensional tensor defined in 769 /// the @a subgraph with each dimension either equal to the corresponding dimension of the second 770 /// input, or equal to 1. In the latter case, the elements of the input tensor are broadcasted along 771 /// that dimension. 772 /// @param input2_id - Value ID for the second input tensor. The input tensor must be an M-dimensional tensor defined in 773 /// the @a subgraph with each dimension either equal to the corresponding dimension of the first 774 /// input, or equal to 1. In the latter case, the elements of the input tensor are broadcasted along 775 /// that dimension. 776 /// @param output_id - Value ID for the output tensor. The output tensor must be a max(N,M)-dimensional tensor defined 777 /// in the @a subgraph with each dimension equal to the maximum between the corresponding dimension 778 /// of the two inputs. 779 /// @param flags - binary features of the Squared Difference Node. No supported flags are currently defined. 780 enum xnn_status xnn_define_squared_difference( 781 xnn_subgraph_t subgraph, 782 uint32_t input1_id, 783 uint32_t input2_id, 784 uint32_t output_id, 785 uint32_t flags); 786 787 /// Define a Constant Pad Node with static padding specification and add it to a Subgraph. 788 /// 789 /// @param subgraph - a Subgraph object that will own the created Node. 790 /// @param pre_paddings - number of padding elements to insert before input elements for every dimension. This array 791 /// must have as many elements as the the number of dimensions in the input tensor. 792 /// @param post_paddings - number of padding elements to insert after input elements for every dimension. This array 793 /// must have as many elements as the the number of dimensions in the input tensor. 794 /// @param padding_value - constant value used to initialize padding elements. 795 /// @param input_id - Value ID for the input tensor. The input tensor must be defined in the @a subgraph. 796 /// @param output_id - Value ID for the output tensor. The output tensor must be defined in the @a subgraph, and its 797 /// shape must match the shape of the input tensor with padding. 798 /// @param flags - binary features of the Constant Pad Node. No supported flags are currently defined. 799 enum xnn_status xnn_define_static_constant_pad( 800 xnn_subgraph_t subgraph, 801 const size_t* pre_paddings, 802 const size_t* post_paddings, 803 float padding_value, 804 uint32_t input_id, 805 uint32_t output_id, 806 uint32_t flags); 807 808 /// Define a Reshape Node with static shape specification and add it to a Subgraph. 809 /// 810 /// @param subgraph - a Subgraph object that will own the created Node. 811 /// @param num_dims - number of shape dimensions in the output tensor. 812 /// @param new_shape - shape dimensions of the output tensor. 813 /// @param input_id - Value ID for the input tensor. The input tensor must be defined in the @a subgraph. 814 /// @param output_id - Value ID for the output tensor. The output tensor must be defined in the @a subgraph, and its 815 /// shape must match the shape of the input tensor with padding. 816 /// @param flags - binary features of the Reshape Node. No supported flags are currently defined. 817 enum xnn_status xnn_define_static_reshape( 818 xnn_subgraph_t subgraph, 819 size_t num_dims, 820 const size_t* new_shape, 821 uint32_t input_id, 822 uint32_t output_id, 823 uint32_t flags); 824 825 /// Define a 2D Resize Bilinear Node with static output height & width specification and add it to a Subgraph. 826 /// 827 /// @param subgraph - a Subgraph object that will own the created Node. 828 /// @param new_height - height dimension of the output tensor. 829 /// @param new_width - width dimension of the output tensor. 830 /// @param input_id - Value ID for the input tensor. The input tensor must be a 4D tensor defined in the @a subgraph 831 /// with [N, H, W, C] dimensions 832 /// @param output_id - Value ID for the output tensor. The output tensor must be a 4D tensor defined in the @a subgraph 833 /// with [N, new_height, new_width, C] dimensions. 834 /// @param flags - binary features of the 2D Resize Bilinear Node. The only currently supported values are 835 /// XNN_FLAG_TENSORFLOW_LEGACY_MODE and XNN_FLAG_ALIGN_CORNERS, which are mutually exclusive. 836 enum xnn_status xnn_define_static_resize_bilinear_2d( 837 xnn_subgraph_t subgraph, 838 size_t new_height, 839 size_t new_width, 840 uint32_t input_id, 841 uint32_t output_id, 842 uint32_t flags); 843 844 /// Define a PReLU (Parametric ReLU) Node and add it to a Subgraph. 845 /// 846 /// @param subgraph - a Subgraph object that will own the created Node. 847 /// @param input_id - Value ID for the input tensor. The input tensor must be a 4D tensor defined in the @a subgraph 848 /// with [N, H, W, channels] dimensions 849 /// @param slope_id - Value ID for the bias tensor. The bias tensor must be a 1D tensor defined in the @a subgraph with 850 /// [channels] dimensions. 851 /// @param output_id - Value ID for the output tensor. The output tensor must be a 4D tensor defined in the @a subgraph 852 /// with [N, H, W, channels] dimensions. 853 /// @param flags - binary features of the PReLU Node. No supported flags are currently defined. 854 enum xnn_status xnn_define_prelu( 855 xnn_subgraph_t subgraph, 856 uint32_t input_id, 857 uint32_t slope_id, 858 uint32_t output_id, 859 uint32_t flags); 860 861 /// Define a Abs Node and add it to a Subgraph. 862 /// 863 /// @param subgraph - a Subgraph object that will own the created Node. 864 /// @param input_id - Value ID for the input tensor. The input tensor must be defined in the @a subgraph. 865 /// @param output_id - Value ID for the output tensor. The output tensor must be defined in the @a subgraph, and its 866 /// shape must match the shape of the input tensor. 867 /// @param flags - binary features of the Abs Node. No supported flags are currently defined. 868 enum xnn_status xnn_define_abs( 869 xnn_subgraph_t subgraph, 870 uint32_t input_id, 871 uint32_t output_id, 872 uint32_t flags); 873 874 /// Define a Bankers' Rounding Node and add it to a Subgraph. 875 /// 876 /// @param subgraph - a Subgraph object that will own the created Node. 877 /// @param input_id - Value ID for the input tensor. The input tensor must be defined in the @a subgraph. 878 /// @param output_id - Value ID for the output tensor. The output tensor must be defined in the @a subgraph, and its 879 /// shape must match the shape of the input tensor. 880 /// @param flags - binary features of the Bankers' Rounding Node. No supported flags are currently defined. 881 enum xnn_status xnn_define_bankers_rounding( 882 xnn_subgraph_t subgraph, 883 uint32_t input_id, 884 uint32_t output_id, 885 uint32_t flags); 886 887 /// Define a Ceiling Node and add it to a Subgraph. 888 /// 889 /// @param subgraph - a Subgraph object that will own the created Node. 890 /// @param input_id - Value ID for the input tensor. The input tensor must be defined in the @a subgraph. 891 /// @param output_id - Value ID for the output tensor. The output tensor must be defined in the @a subgraph, and its 892 /// shape must match the shape of the input tensor. 893 /// @param flags - binary features of the Ceiling Node. No supported flags are currently defined. 894 enum xnn_status xnn_define_ceiling( 895 xnn_subgraph_t subgraph, 896 uint32_t input_id, 897 uint32_t output_id, 898 uint32_t flags); 899 900 /// Define a Clamp Node and add it to a Subgraph. 901 /// 902 /// @param subgraph - a Subgraph object that will own the created Node. 903 /// @param output_min - lower bound for clipping output values. 904 /// @param output_max - upper bound for clipping output values. 905 /// @param input_id - Value ID for the input tensor. The input tensor must be defined in the @a subgraph. 906 /// @param output_id - Value ID for the output tensor. The output tensor must be defined in the @a subgraph, and its 907 /// shape must match the shape of the input tensor. 908 /// @param flags - binary features of the Clamp Node. No supported flags are currently defined. 909 enum xnn_status xnn_define_clamp( 910 xnn_subgraph_t subgraph, 911 float output_min, 912 float output_max, 913 uint32_t input_id, 914 uint32_t output_id, 915 uint32_t flags); 916 917 /// Define an ELU (Exponential Linear Unit) Node and add it to a Subgraph. 918 /// 919 /// @param subgraph - a Subgraph object that will own the created Node. 920 /// @param alpha - scale factor for negative output elements. 921 /// @param input_id - Value ID for the input tensor. The input tensor must be defined in the @a subgraph. 922 /// @param output_id - Value ID for the output tensor. The output tensor must be defined in the @a subgraph, and its 923 /// shape must match the shape of the input tensor. 924 /// @param flags - binary features of the ELU Node. No supported flags are currently defined. 925 enum xnn_status xnn_define_elu( 926 xnn_subgraph_t subgraph, 927 float alpha, 928 uint32_t input_id, 929 uint32_t output_id, 930 uint32_t flags); 931 932 /// Define a Floor Node and add it to a Subgraph. 933 /// 934 /// @param subgraph - a Subgraph object that will own the created Node. 935 /// @param input_id - Value ID for the input tensor. The input tensor must be defined in the @a subgraph. 936 /// @param output_id - Value ID for the output tensor. The output tensor must be defined in the @a subgraph, and its 937 /// shape must match the shape of the input tensor. 938 /// @param flags - binary features of the Floor Node. No supported flags are currently defined. 939 enum xnn_status xnn_define_floor( 940 xnn_subgraph_t subgraph, 941 uint32_t input_id, 942 uint32_t output_id, 943 uint32_t flags); 944 945 /// Define a HardSwish Node and add it to a Subgraph. 946 /// 947 /// @param subgraph - a Subgraph object that will own the created Node. 948 /// @param input_id - Value ID for the input tensor. The input tensor must be defined in the @a subgraph. 949 /// @param output_id - Value ID for the output tensor. The output tensor must be defined in the @a subgraph, and its 950 /// shape must match the shape of the input tensor. 951 /// @param flags - binary features of the HardSwish Node. No supported flags are currently defined. 952 enum xnn_status xnn_define_hardswish( 953 xnn_subgraph_t subgraph, 954 uint32_t input_id, 955 uint32_t output_id, 956 uint32_t flags); 957 958 /// Define a Leaky ReLU Node and add it to a Subgraph. 959 /// 960 /// @param subgraph - a Subgraph object that will own the created Node. 961 /// @param negative_slope - scale factor for negative input elements. 962 /// @param input_id - Value ID for the input tensor. The input tensor must be defined in the @a subgraph. 963 /// @param output_id - Value ID for the output tensor. The output tensor must be defined in the @a subgraph, and its 964 /// shape must match the shape of the input tensor. 965 /// @param flags - binary features of the Leaky ReLU Node. No supported flags are currently defined. 966 enum xnn_status xnn_define_leaky_relu( 967 xnn_subgraph_t subgraph, 968 float negative_slope, 969 uint32_t input_id, 970 uint32_t output_id, 971 uint32_t flags); 972 973 /// Define a Negate Node and add it to a Subgraph. 974 /// 975 /// @param subgraph - a Subgraph object that will own the created Node. 976 /// @param input_id - Value ID for the input tensor. The input tensor must be defined in the @a subgraph. 977 /// @param output_id - Value ID for the output tensor. The output tensor must be defined in the @a subgraph, and its 978 /// shape must match the shape of the input tensor. 979 /// @param flags - binary features of the Negate Node. No supported flags are currently defined. 980 enum xnn_status xnn_define_negate( 981 xnn_subgraph_t subgraph, 982 uint32_t input_id, 983 uint32_t output_id, 984 uint32_t flags); 985 986 /// Define a Sigmoid Node and add it to a Subgraph. 987 /// 988 /// @param subgraph - a Subgraph object that will own the created Node. 989 /// @param input_id - Value ID for the input tensor. The input tensor must be defined in the @a subgraph. 990 /// @param output_id - Value ID for the output tensor. The output tensor must be defined in the @a subgraph, and its 991 /// shape must match the shape of the input tensor. 992 /// @param flags - binary features of the Sigmoid Node. No supported flags are currently defined. 993 enum xnn_status xnn_define_sigmoid( 994 xnn_subgraph_t subgraph, 995 uint32_t input_id, 996 uint32_t output_id, 997 uint32_t flags); 998 999 /// Define a SoftMax Node and add it to a Subgraph. 1000 /// 1001 /// @param subgraph - a Subgraph object that will own the created Node. 1002 /// @param input_id - Value ID for the input tensor. The input tensor must be defined in the @a subgraph, and have at 1003 /// least one dimension. 1004 /// @param output_id - Value ID for the output tensor. The output tensor must be defined in the @a subgraph, and its 1005 /// shape must match the shape of the input tensor. 1006 /// @param flags - binary features of the SoftMax Node. No supported flags are currently defined. 1007 enum xnn_status xnn_define_softmax( 1008 xnn_subgraph_t subgraph, 1009 uint32_t input_id, 1010 uint32_t output_id, 1011 uint32_t flags); 1012 1013 /// Define a Square Node and add it to a Subgraph. 1014 /// 1015 /// @param subgraph - a Subgraph object that will own the created Node. 1016 /// @param input_id - Value ID for the input tensor. The input tensor must be defined in the @a subgraph. 1017 /// @param output_id - Value ID for the output tensor. The output tensor must be defined in the @a subgraph, and its 1018 /// shape must match the shape of the input tensor. 1019 /// @param flags - binary features of the Square Node. No supported flags are currently defined. 1020 enum xnn_status xnn_define_square( 1021 xnn_subgraph_t subgraph, 1022 uint32_t input_id, 1023 uint32_t output_id, 1024 uint32_t flags); 1025 1026 /// Define a Square Root Node and add it to a Subgraph. 1027 /// 1028 /// @param subgraph - a Subgraph object that will own the created Node. 1029 /// @param input_id - Value ID for the input tensor. The input tensor must be defined in the @a subgraph. 1030 /// @param output_id - Value ID for the output tensor. The output tensor must be defined in the @a subgraph, and its 1031 /// shape must match the shape of the input tensor. 1032 /// @param flags - binary features of the Square Root Node. No supported flags are currently defined. 1033 enum xnn_status xnn_define_square_root( 1034 xnn_subgraph_t subgraph, 1035 uint32_t input_id, 1036 uint32_t output_id, 1037 uint32_t flags); 1038 1039 /// Runtime is a combination of an execution plan for subgraph Nodes and a memory manager for subgraph Values. 1040 typedef struct xnn_runtime* xnn_runtime_t; 1041 1042 /// Create a Runtime object from a subgraph. 1043 /// 1044 /// @param subgraph - a Subgraph object with all Values and Nodes that would be handled by the runtime. No Values or 1045 /// Nodes can be added to the runtime once it is constructed. 1046 /// @param threadpool - the thread pool to be used for parallelisation of computations in the runtime. If the thread 1047 /// pool is NULL, the computation would run on the caller thread without parallelization. 1048 /// @param flags - binary features of the runtime. The only currently supported value is XNN_FLAG_SPARSE_INFERENCE. 1049 /// @param runtime_out - pointer to the variable that will be initialized with a handle to the Runtime object upon 1050 /// successful return. Once constructed, the Runtime object is independent of the Subgraph object 1051 /// used to create it. 1052 enum xnn_status xnn_create_runtime_v2( 1053 xnn_subgraph_t subgraph, 1054 pthreadpool_t threadpool, 1055 uint32_t flags, 1056 xnn_runtime_t* runtime_out); 1057 1058 enum xnn_status xnn_create_runtime( 1059 xnn_subgraph_t subgraph, 1060 xnn_runtime_t* runtime_out); 1061 1062 struct xnn_external_value { 1063 uint32_t id; 1064 void* data; 1065 }; 1066 1067 /// Setup data pointers for external inputs and outputs in a Runtime object. 1068 /// 1069 /// @param runtime - a Runtime object created with @ref xnn_create_runtime or @ref xnn_create_runtime_v2. 1070 /// @param num_external_values - the number of external inputs and outputs specified in this call. This number must 1071 /// match the number of external inputs and outputs in the runtime, i.e. all external 1072 /// inputs and outputs in the runtime must be specified in one call. 1073 /// @param external_values - array with location information for all external inputs and outputs in the runtime. 1074 enum xnn_status xnn_setup_runtime( 1075 xnn_runtime_t runtime, 1076 size_t num_external_values, 1077 const struct xnn_external_value* external_values); 1078 1079 /// Execute forward pass for all operators in the runtime. 1080 /// 1081 /// @param runtime - the Runtime object with the execution plan to invoke. 1082 enum xnn_status xnn_invoke_runtime( 1083 xnn_runtime_t runtime); 1084 1085 /// Destroy a Runtime object, as well as operators and memory associated with it. 1086 /// 1087 /// @param runtime - the Runtime object to destroy. 1088 enum xnn_status xnn_delete_runtime( 1089 xnn_runtime_t runtime); 1090 1091 typedef struct xnn_operator* xnn_operator_t; 1092 1093 enum xnn_status xnn_run_operator( 1094 xnn_operator_t op, 1095 pthreadpool_t threadpool); 1096 1097 enum xnn_status xnn_delete_operator( 1098 xnn_operator_t op); 1099 1100 #ifndef XNN_NO_F32_OPERATORS 1101 1102 enum xnn_status xnn_create_abs_nc_f32( 1103 size_t channels, 1104 size_t input_stride, 1105 size_t output_stride, 1106 uint32_t flags, 1107 xnn_operator_t* abs_op_out); 1108 1109 enum xnn_status xnn_setup_abs_nc_f32( 1110 xnn_operator_t abs_op, 1111 size_t batch_size, 1112 const float* input, 1113 float* output, 1114 pthreadpool_t threadpool); 1115 1116 enum xnn_status xnn_create_add_nd_f32( 1117 float output_min, 1118 float output_max, 1119 uint32_t flags, 1120 xnn_operator_t* add_op_out); 1121 1122 enum xnn_status xnn_setup_add_nd_f32( 1123 xnn_operator_t add_op, 1124 size_t num_input1_dims, 1125 const size_t* input1_shape, 1126 size_t num_input2_dims, 1127 const size_t* input2_shape, 1128 const float* input1, 1129 const float* input2, 1130 float* output, 1131 pthreadpool_t threadpool); 1132 1133 enum xnn_status xnn_create_argmax_pooling2d_nhwc_f32( 1134 uint32_t input_padding_top, 1135 uint32_t input_padding_right, 1136 uint32_t input_padding_bottom, 1137 uint32_t input_padding_left, 1138 uint32_t pooling_height, 1139 uint32_t pooling_width, 1140 size_t channels, 1141 size_t input_pixel_stride, 1142 size_t output_pixel_stride, 1143 uint32_t flags, 1144 xnn_operator_t* argmax_pooling_op_out); 1145 1146 enum xnn_status xnn_setup_argmax_pooling2d_nhwc_f32( 1147 xnn_operator_t argmax_pooling_op, 1148 size_t batch_size, 1149 size_t input_height, 1150 size_t input_width, 1151 const float* input, 1152 float* output, 1153 uint32_t* index, 1154 pthreadpool_t threadpool); 1155 1156 enum xnn_status xnn_create_average_pooling2d_nhwc_f32( 1157 uint32_t input_padding_top, 1158 uint32_t input_padding_right, 1159 uint32_t input_padding_bottom, 1160 uint32_t input_padding_left, 1161 uint32_t pooling_height, 1162 uint32_t pooling_width, 1163 uint32_t stride_height, 1164 uint32_t stride_width, 1165 size_t channels, 1166 size_t input_pixel_stride, 1167 size_t output_pixel_stride, 1168 float output_min, 1169 float output_max, 1170 uint32_t flags, 1171 xnn_operator_t* average_pooling_op_out); 1172 1173 enum xnn_status xnn_setup_average_pooling2d_nhwc_f32( 1174 xnn_operator_t average_pooling_op, 1175 size_t batch_size, 1176 size_t input_height, 1177 size_t input_width, 1178 const float* input, 1179 float* output, 1180 pthreadpool_t threadpool); 1181 1182 enum xnn_status xnn_create_bankers_rounding_nc_f32( 1183 size_t channels, 1184 size_t input_stride, 1185 size_t output_stride, 1186 uint32_t flags, 1187 xnn_operator_t* rounding_op_out); 1188 1189 enum xnn_status xnn_setup_bankers_rounding_nc_f32( 1190 xnn_operator_t rounding_op, 1191 size_t batch_size, 1192 const float* input, 1193 float* output, 1194 pthreadpool_t threadpool); 1195 1196 enum xnn_status xnn_create_ceiling_nc_f32( 1197 size_t channels, 1198 size_t input_stride, 1199 size_t output_stride, 1200 uint32_t flags, 1201 xnn_operator_t* ceiling_op_out); 1202 1203 enum xnn_status xnn_setup_ceiling_nc_f32( 1204 xnn_operator_t ceiling_op, 1205 size_t batch_size, 1206 const float* input, 1207 float* output, 1208 pthreadpool_t threadpool); 1209 1210 enum xnn_status xnn_create_clamp_nc_f32( 1211 size_t channels, 1212 size_t input_stride, 1213 size_t output_stride, 1214 float output_min, 1215 float output_max, 1216 uint32_t flags, 1217 xnn_operator_t* clamp_op_out); 1218 1219 enum xnn_status xnn_setup_clamp_nc_f32( 1220 xnn_operator_t clamp_op, 1221 size_t batch_size, 1222 const float* input, 1223 float* output, 1224 pthreadpool_t threadpool); 1225 1226 enum xnn_status xnn_create_convolution2d_nhwc_f32( 1227 uint32_t input_padding_top, 1228 uint32_t input_padding_right, 1229 uint32_t input_padding_bottom, 1230 uint32_t input_padding_left, 1231 uint32_t kernel_height, 1232 uint32_t kernel_width, 1233 uint32_t subsampling_height, 1234 uint32_t subsampling_width, 1235 uint32_t dilation_height, 1236 uint32_t dilation_width, 1237 uint32_t groups, 1238 size_t group_input_channels, 1239 size_t group_output_channels, 1240 size_t input_channel_stride, 1241 size_t output_channel_stride, 1242 const float* kernel, 1243 const float* bias, 1244 float output_min, 1245 float output_max, 1246 uint32_t flags, 1247 xnn_operator_t* convolution_op_out); 1248 1249 enum xnn_status xnn_setup_convolution2d_nhwc_f32( 1250 xnn_operator_t convolution_op, 1251 size_t batch_size, 1252 size_t input_height, 1253 size_t input_width, 1254 const float* input, 1255 float* output, 1256 pthreadpool_t threadpool); 1257 1258 enum xnn_status xnn_create_deconvolution2d_nhwc_f32( 1259 uint32_t output_padding_top, 1260 uint32_t output_padding_right, 1261 uint32_t output_padding_bottom, 1262 uint32_t output_padding_left, 1263 uint32_t kernel_height, 1264 uint32_t kernel_width, 1265 uint32_t stride_height, 1266 uint32_t stride_width, 1267 uint32_t dilation_height, 1268 uint32_t dilation_width, 1269 uint32_t groups, 1270 size_t group_input_channels, 1271 size_t group_output_channels, 1272 size_t input_pixel_stride, 1273 size_t output_pixel_stride, 1274 const float* kernel, 1275 const float* bias, 1276 float output_min, 1277 float output_max, 1278 uint32_t flags, 1279 xnn_operator_t* deconvolution_op_out); 1280 1281 enum xnn_status xnn_setup_deconvolution2d_nhwc_f32( 1282 xnn_operator_t deconvolution_op, 1283 size_t batch_size, 1284 size_t input_height, 1285 size_t input_width, 1286 uint32_t adjustment_height, 1287 uint32_t adjustment_width, 1288 const float* input, 1289 float* output, 1290 pthreadpool_t threadpool); 1291 1292 enum xnn_status xnn_create_divide_nd_f32( 1293 float output_min, 1294 float output_max, 1295 uint32_t flags, 1296 xnn_operator_t* divide_op_out); 1297 1298 enum xnn_status xnn_setup_divide_nd_f32( 1299 xnn_operator_t divide_op, 1300 size_t num_input1_dims, 1301 const size_t* input1_shape, 1302 size_t num_input2_dims, 1303 const size_t* input2_shape, 1304 const float* input1, 1305 const float* input2, 1306 float* output, 1307 pthreadpool_t threadpool); 1308 1309 enum xnn_status xnn_create_elu_nc_f32( 1310 size_t channels, 1311 size_t input_stride, 1312 size_t output_stride, 1313 float alpha, 1314 uint32_t flags, 1315 xnn_operator_t* elu_op_out); 1316 1317 enum xnn_status xnn_setup_elu_nc_f32( 1318 xnn_operator_t elu_op, 1319 size_t batch_size, 1320 const float* input, 1321 float* output, 1322 pthreadpool_t threadpool); 1323 1324 enum xnn_status xnn_create_fully_connected_nc_f32( 1325 size_t input_channels, 1326 size_t output_channels, 1327 size_t input_stride, 1328 size_t output_stride, 1329 const float* kernel, 1330 const float* bias, 1331 float output_min, 1332 float output_max, 1333 uint32_t flags, 1334 xnn_operator_t* fully_connected_op_out); 1335 1336 enum xnn_status xnn_setup_fully_connected_nc_f32( 1337 xnn_operator_t fully_connected_op, 1338 size_t batch_size, 1339 const float* input, 1340 float* output, 1341 pthreadpool_t threadpool); 1342 1343 enum xnn_status xnn_create_floor_nc_f32( 1344 size_t channels, 1345 size_t input_stride, 1346 size_t output_stride, 1347 uint32_t flags, 1348 xnn_operator_t* floor_op_out); 1349 1350 enum xnn_status xnn_setup_floor_nc_f32( 1351 xnn_operator_t floor_op, 1352 size_t batch_size, 1353 const float* input, 1354 float* output, 1355 pthreadpool_t threadpool); 1356 1357 enum xnn_status xnn_create_global_average_pooling_nwc_f32( 1358 size_t channels, 1359 size_t input_stride, 1360 size_t output_stride, 1361 float output_min, 1362 float output_max, 1363 uint32_t flags, 1364 xnn_operator_t* global_average_pooling_op_out); 1365 1366 enum xnn_status xnn_setup_global_average_pooling_nwc_f32( 1367 xnn_operator_t global_average_pooling_op, 1368 size_t batch_size, 1369 size_t width, 1370 const float* input, 1371 float* output, 1372 pthreadpool_t threadpool); 1373 1374 enum xnn_status xnn_create_hardswish_nc_f32( 1375 size_t channels, 1376 size_t input_stride, 1377 size_t output_stride, 1378 uint32_t flags, 1379 xnn_operator_t* hardswish_op_out); 1380 1381 enum xnn_status xnn_setup_hardswish_nc_f32( 1382 xnn_operator_t hardswish_op, 1383 size_t batch_size, 1384 const float* input, 1385 float* output, 1386 pthreadpool_t threadpool); 1387 1388 enum xnn_status xnn_create_leaky_relu_nc_f32( 1389 size_t channels, 1390 size_t input_stride, 1391 size_t output_stride, 1392 float negative_slope, 1393 uint32_t flags, 1394 xnn_operator_t* leaky_relu_op_out); 1395 1396 enum xnn_status xnn_setup_leaky_relu_nc_f32( 1397 xnn_operator_t leaky_relu_op, 1398 size_t batch_size, 1399 const float* input, 1400 float* output, 1401 pthreadpool_t threadpool); 1402 1403 enum xnn_status xnn_create_max_pooling2d_nhwc_f32( 1404 uint32_t input_padding_top, 1405 uint32_t input_padding_right, 1406 uint32_t input_padding_bottom, 1407 uint32_t input_padding_left, 1408 uint32_t pooling_height, 1409 uint32_t pooling_width, 1410 uint32_t stride_height, 1411 uint32_t stride_width, 1412 uint32_t dilation_height, 1413 uint32_t dilation_width, 1414 size_t channels, 1415 size_t input_pixel_stride, 1416 size_t output_pixel_stride, 1417 float output_min, 1418 float output_max, 1419 uint32_t flags, 1420 xnn_operator_t* max_pooling_op_out); 1421 1422 enum xnn_status xnn_setup_max_pooling2d_nhwc_f32( 1423 xnn_operator_t max_pooling_op, 1424 size_t batch_size, 1425 size_t input_height, 1426 size_t input_width, 1427 const float* input, 1428 float* output, 1429 pthreadpool_t threadpool); 1430 1431 enum xnn_status xnn_create_maximum_nd_f32( 1432 uint32_t flags, 1433 xnn_operator_t* maximum_op_out); 1434 1435 enum xnn_status xnn_setup_maximum_nd_f32( 1436 xnn_operator_t maximum_op, 1437 size_t num_input1_dims, 1438 const size_t* input1_shape, 1439 size_t num_input2_dims, 1440 const size_t* input2_shape, 1441 const float* input1, 1442 const float* input2, 1443 float* output, 1444 pthreadpool_t threadpool); 1445 1446 enum xnn_status xnn_create_minimum_nd_f32( 1447 uint32_t flags, 1448 xnn_operator_t* minimum_op_out); 1449 1450 enum xnn_status xnn_setup_minimum_nd_f32( 1451 xnn_operator_t minimum_op, 1452 size_t num_input1_dims, 1453 const size_t* input1_shape, 1454 size_t num_input2_dims, 1455 const size_t* input2_shape, 1456 const float* input1, 1457 const float* input2, 1458 float* output, 1459 pthreadpool_t threadpool); 1460 1461 enum xnn_status xnn_create_multiply_nd_f32( 1462 float output_min, 1463 float output_max, 1464 uint32_t flags, 1465 xnn_operator_t* multiply_op_out); 1466 1467 enum xnn_status xnn_setup_multiply_nd_f32( 1468 xnn_operator_t multiply_op, 1469 size_t num_input1_dims, 1470 const size_t* input1_shape, 1471 size_t num_input2_dims, 1472 const size_t* input2_shape, 1473 const float* input1, 1474 const float* input2, 1475 float* output, 1476 pthreadpool_t threadpool); 1477 1478 enum xnn_status xnn_create_negate_nc_f32( 1479 size_t channels, 1480 size_t input_stride, 1481 size_t output_stride, 1482 uint32_t flags, 1483 xnn_operator_t* negate_op_out); 1484 1485 enum xnn_status xnn_setup_negate_nc_f32( 1486 xnn_operator_t negate_op, 1487 size_t batch_size, 1488 const float* input, 1489 float* output, 1490 pthreadpool_t threadpool); 1491 1492 enum xnn_status xnn_create_prelu_nc_f32( 1493 size_t channels, 1494 size_t input_stride, 1495 size_t output_stride, 1496 const float* negative_slope, 1497 uint32_t flags, 1498 xnn_operator_t* prelu_op_out); 1499 1500 enum xnn_status xnn_setup_prelu_nc_f32( 1501 xnn_operator_t prelu_op, 1502 size_t batch_size, 1503 const float* input, 1504 float* output, 1505 pthreadpool_t threadpool); 1506 1507 enum xnn_status xnn_create_resize_bilinear2d_nchw_f32( 1508 size_t channels, 1509 size_t input_pixel_stride, 1510 size_t output_pixel_stride, 1511 uint32_t flags, 1512 xnn_operator_t* resize_op_out); 1513 1514 enum xnn_status xnn_setup_resize_bilinear2d_nchw_f32( 1515 xnn_operator_t resize_op, 1516 size_t batch_size, 1517 size_t input_height, 1518 size_t input_width, 1519 size_t output_height, 1520 size_t output_width, 1521 const float* input, 1522 float* output, 1523 pthreadpool_t threadpool); 1524 1525 enum xnn_status xnn_create_resize_bilinear2d_nhwc_f32( 1526 size_t channels, 1527 size_t input_pixel_stride, 1528 size_t output_pixel_stride, 1529 uint32_t flags, 1530 xnn_operator_t* resize_op_out); 1531 1532 enum xnn_status xnn_setup_resize_bilinear2d_nhwc_f32( 1533 xnn_operator_t resize_op, 1534 size_t batch_size, 1535 size_t input_height, 1536 size_t input_width, 1537 size_t output_height, 1538 size_t output_width, 1539 const float* input, 1540 float* output, 1541 pthreadpool_t threadpool); 1542 1543 enum xnn_status xnn_create_sigmoid_nc_f32( 1544 size_t channels, 1545 size_t input_stride, 1546 size_t output_stride, 1547 uint32_t flags, 1548 xnn_operator_t* sigmoid_op_out); 1549 1550 enum xnn_status xnn_setup_sigmoid_nc_f32( 1551 xnn_operator_t sigmoid_op, 1552 size_t batch_size, 1553 const float* input, 1554 float* output, 1555 pthreadpool_t threadpool); 1556 1557 enum xnn_status xnn_create_softmax_nc_f32( 1558 size_t channels, 1559 size_t input_stride, 1560 size_t output_stride, 1561 uint32_t flags, 1562 xnn_operator_t* softmax_op_out); 1563 1564 enum xnn_status xnn_setup_softmax_nc_f32( 1565 xnn_operator_t softmax_op, 1566 size_t batch_size, 1567 const float* input, 1568 float* output, 1569 pthreadpool_t threadpool); 1570 1571 enum xnn_status xnn_create_square_nc_f32( 1572 size_t channels, 1573 size_t input_stride, 1574 size_t output_stride, 1575 uint32_t flags, 1576 xnn_operator_t* square_op_out); 1577 1578 enum xnn_status xnn_setup_square_nc_f32( 1579 xnn_operator_t square_op, 1580 size_t batch_size, 1581 const float* input, 1582 float* output, 1583 pthreadpool_t threadpool); 1584 1585 enum xnn_status xnn_create_square_root_nc_f32( 1586 size_t channels, 1587 size_t input_stride, 1588 size_t output_stride, 1589 uint32_t flags, 1590 xnn_operator_t* sqrt_op_out); 1591 1592 enum xnn_status xnn_setup_square_root_nc_f32( 1593 xnn_operator_t sqrt_op, 1594 size_t batch_size, 1595 const float* input, 1596 float* output, 1597 pthreadpool_t threadpool); 1598 1599 enum xnn_status xnn_create_squared_difference_nd_f32( 1600 uint32_t flags, 1601 xnn_operator_t* squared_difference_op_out); 1602 1603 enum xnn_status xnn_setup_squared_difference_nd_f32( 1604 xnn_operator_t squared_difference_op, 1605 size_t num_input1_dims, 1606 const size_t* input1_shape, 1607 size_t num_input2_dims, 1608 const size_t* input2_shape, 1609 const float* input1, 1610 const float* input2, 1611 float* output, 1612 pthreadpool_t threadpool); 1613 1614 enum xnn_status xnn_create_subtract_nd_f32( 1615 float output_min, 1616 float output_max, 1617 uint32_t flags, 1618 xnn_operator_t* subtract_op_out); 1619 1620 enum xnn_status xnn_setup_subtract_nd_f32( 1621 xnn_operator_t subtract_op, 1622 size_t num_input1_dims, 1623 const size_t* input1_shape, 1624 size_t num_input2_dims, 1625 const size_t* input2_shape, 1626 const float* input1, 1627 const float* input2, 1628 float* output, 1629 pthreadpool_t threadpool); 1630 1631 enum xnn_status xnn_create_truncation_nc_f32( 1632 size_t channels, 1633 size_t input_stride, 1634 size_t output_stride, 1635 uint32_t flags, 1636 xnn_operator_t* truncation_op_out); 1637 1638 enum xnn_status xnn_setup_truncation_nc_f32( 1639 xnn_operator_t truncation_op, 1640 size_t batch_size, 1641 const float* input, 1642 float* output, 1643 pthreadpool_t threadpool); 1644 1645 #ifndef XNN_NO_NCHW_OPERATORS 1646 1647 enum xnn_status xnn_create_convolution2d_nchw_f32( 1648 uint32_t input_padding_top, 1649 uint32_t input_padding_right, 1650 uint32_t input_padding_bottom, 1651 uint32_t input_padding_left, 1652 uint32_t kernel_height, 1653 uint32_t kernel_width, 1654 uint32_t subsampling_height, 1655 uint32_t subsampling_width, 1656 uint32_t dilation_height, 1657 uint32_t dilation_width, 1658 uint32_t groups, 1659 size_t group_input_channels, 1660 size_t group_output_channels, 1661 size_t input_channel_stride, 1662 size_t output_channel_stride, 1663 const float* kernel, 1664 const float* bias, 1665 float output_min, 1666 float output_max, 1667 uint32_t flags, 1668 xnn_operator_t* convolution_op_out); 1669 1670 enum xnn_status xnn_setup_convolution2d_nchw_f32( 1671 xnn_operator_t convolution_op, 1672 size_t batch_size, 1673 size_t input_height, 1674 size_t input_width, 1675 const float* input, 1676 float* output, 1677 pthreadpool_t threadpool); 1678 1679 enum xnn_status xnn_create_global_average_pooling_ncw_f32( 1680 size_t channels, 1681 float output_min, 1682 float output_max, 1683 uint32_t flags, 1684 xnn_operator_t* global_average_pooling_op_out); 1685 1686 enum xnn_status xnn_setup_global_average_pooling_ncw_f32( 1687 xnn_operator_t global_average_pooling_op, 1688 size_t batch_size, 1689 size_t width, 1690 const float* input, 1691 float* output, 1692 pthreadpool_t threadpool); 1693 1694 #endif // XNN_NO_NCHW_OPERATORS 1695 1696 #endif // XNN_NO_F32_OPERATORS 1697 1698 #ifndef XNN_NO_X32_OPERATORS 1699 1700 enum xnn_status xnn_create_channel_shuffle_nc_x32( 1701 size_t groups, 1702 size_t group_channels, 1703 size_t input_stride, 1704 size_t output_stride, 1705 uint32_t flags, 1706 xnn_operator_t* channel_shuffle_op_out); 1707 1708 enum xnn_status xnn_setup_channel_shuffle_nc_x32( 1709 xnn_operator_t channel_shuffle_op, 1710 size_t batch_size, 1711 const void* input, 1712 void* output, 1713 pthreadpool_t threadpool); 1714 1715 enum xnn_status xnn_create_constant_pad_nd_x32( 1716 const void* padding_value, 1717 uint32_t flags, 1718 xnn_operator_t* constant_pad_op_out); 1719 1720 enum xnn_status xnn_setup_constant_pad_nd_x32( 1721 xnn_operator_t constant_pad_op, 1722 size_t num_dims, 1723 const size_t* input_shape, 1724 const size_t* pre_padding, 1725 const size_t* post_padding, 1726 const void* input, 1727 void* output, 1728 pthreadpool_t threadpool); 1729 1730 enum xnn_status xnn_create_copy_nc_x32( 1731 size_t channels, 1732 size_t input_stride, 1733 size_t output_stride, 1734 uint32_t flags, 1735 xnn_operator_t* copy_op_out); 1736 1737 enum xnn_status xnn_setup_copy_nc_x32( 1738 xnn_operator_t copy_op, 1739 size_t batch_size, 1740 const void* input, 1741 void* output, 1742 pthreadpool_t threadpool); 1743 1744 enum xnn_status xnn_create_depth_to_space_nhwc_x32( 1745 size_t output_channels, 1746 size_t input_channel_stride, 1747 size_t output_channel_stride, 1748 uint32_t block_size, 1749 uint32_t flags, 1750 xnn_operator_t* depth_to_space_op_out); 1751 1752 enum xnn_status xnn_setup_depth_to_space_nhwc_x32( 1753 xnn_operator_t depth_to_space_op, 1754 size_t batch_size, 1755 size_t input_height, 1756 size_t input_width, 1757 const void* input, 1758 void* output, 1759 pthreadpool_t threadpool); 1760 1761 enum xnn_status xnn_create_depth_to_space_nchw2nhwc_x32( 1762 size_t output_channels, 1763 size_t input_channel_stride, 1764 size_t output_channel_stride, 1765 uint32_t block_size, 1766 uint32_t flags, 1767 xnn_operator_t* depth_to_space_op_out); 1768 1769 enum xnn_status xnn_setup_depth_to_space_nchw2nhwc_x32( 1770 xnn_operator_t depth_to_space_op, 1771 size_t batch_size, 1772 size_t input_height, 1773 size_t input_width, 1774 const void* input, 1775 void* output, 1776 pthreadpool_t threadpool); 1777 1778 enum xnn_status xnn_create_unpooling2d_nhwc_x32( 1779 uint32_t input_padding_top, 1780 uint32_t input_padding_right, 1781 uint32_t input_padding_bottom, 1782 uint32_t input_padding_left, 1783 uint32_t pooling_height, 1784 uint32_t pooling_width, 1785 size_t channels, 1786 size_t input_pixel_stride, 1787 size_t output_pixel_stride, 1788 uint32_t flags, 1789 xnn_operator_t* unpooling_op_out); 1790 1791 enum xnn_status xnn_setup_unpooling2d_nhwc_x32( 1792 xnn_operator_t unpooling_op, 1793 size_t batch_size, 1794 size_t input_height, 1795 size_t input_width, 1796 const void* input, 1797 const uint32_t* index, 1798 void* output, 1799 pthreadpool_t threadpool); 1800 1801 #endif // XNN_NO_X32_OPERATORS 1802 1803 #ifndef XNN_NO_F16_OPERATORS 1804 1805 enum xnn_status xnn_create_add_nd_f16( 1806 float output_min, 1807 float output_max, 1808 uint32_t flags, 1809 xnn_operator_t* add_op_out); 1810 1811 enum xnn_status xnn_setup_add_nd_f16( 1812 xnn_operator_t add_op, 1813 size_t num_input1_dims, 1814 const size_t* input1_shape, 1815 size_t num_input2_dims, 1816 const size_t* input2_shape, 1817 const void* input1, 1818 const void* input2, 1819 void* output, 1820 pthreadpool_t threadpool); 1821 1822 enum xnn_status xnn_create_convolution2d_nhwc_f16( 1823 uint32_t input_padding_top, 1824 uint32_t input_padding_right, 1825 uint32_t input_padding_bottom, 1826 uint32_t input_padding_left, 1827 uint32_t kernel_height, 1828 uint32_t kernel_width, 1829 uint32_t subsampling_height, 1830 uint32_t subsampling_width, 1831 uint32_t dilation_height, 1832 uint32_t dilation_width, 1833 uint32_t groups, 1834 size_t group_input_channels, 1835 size_t group_output_channels, 1836 size_t input_channel_stride, 1837 size_t output_channel_stride, 1838 const void* kernel, 1839 const void* bias, 1840 float output_min, 1841 float output_max, 1842 uint32_t flags, 1843 xnn_operator_t* convolution_op_out); 1844 1845 enum xnn_status xnn_setup_convolution2d_nhwc_f16( 1846 xnn_operator_t convolution_op, 1847 size_t batch_size, 1848 size_t input_height, 1849 size_t input_width, 1850 const void* input, 1851 void* output, 1852 pthreadpool_t threadpool); 1853 1854 enum xnn_status xnn_create_global_average_pooling_nwc_f16( 1855 size_t channels, 1856 size_t input_stride, 1857 size_t output_stride, 1858 float output_min, 1859 float output_max, 1860 uint32_t flags, 1861 xnn_operator_t* global_average_pooling_op_out); 1862 1863 enum xnn_status xnn_setup_global_average_pooling_nwc_f16( 1864 xnn_operator_t global_average_pooling_op, 1865 size_t batch_size, 1866 size_t width, 1867 const void* input, 1868 void* output, 1869 pthreadpool_t threadpool); 1870 1871 enum xnn_status xnn_create_hardswish_nc_f16( 1872 size_t channels, 1873 size_t input_stride, 1874 size_t output_stride, 1875 uint32_t flags, 1876 xnn_operator_t* hardswish_op_out); 1877 1878 enum xnn_status xnn_setup_hardswish_nc_f16( 1879 xnn_operator_t hardswish_op, 1880 size_t batch_size, 1881 const void* input, 1882 void* output, 1883 pthreadpool_t threadpool); 1884 1885 enum xnn_status xnn_create_multiply_nd_f16( 1886 float output_min, 1887 float output_max, 1888 uint32_t flags, 1889 xnn_operator_t* multiply_op_out); 1890 1891 enum xnn_status xnn_setup_multiply_nd_f16( 1892 xnn_operator_t multiply_op, 1893 size_t num_input1_dims, 1894 const size_t* input1_shape, 1895 size_t num_input2_dims, 1896 const size_t* input2_shape, 1897 const void* input1, 1898 const void* input2, 1899 void* output, 1900 pthreadpool_t threadpool); 1901 1902 #endif // XNN_NO_F16_OPERATORS 1903 1904 #ifndef XNN_NO_QS8_OPERATORS 1905 1906 enum xnn_status xnn_create_add_nd_qs8( 1907 int8_t input1_zero_point, 1908 float input1_scale, 1909 int8_t input2_zero_point, 1910 float input2_scale, 1911 int8_t output_zero_point, 1912 float output_scale, 1913 int8_t output_min, 1914 int8_t output_max, 1915 uint32_t flags, 1916 xnn_operator_t* add_op_out); 1917 1918 enum xnn_status xnn_setup_add_nd_qs8( 1919 xnn_operator_t add_op, 1920 size_t num_input1_dims, 1921 const size_t* input1_shape, 1922 size_t num_input2_dims, 1923 const size_t* input2_shape, 1924 const int8_t* input1, 1925 const int8_t* input2, 1926 int8_t* output, 1927 pthreadpool_t threadpool); 1928 1929 enum xnn_status xnn_create_convolution2d_nhwc_qs8( 1930 uint32_t input_padding_top, 1931 uint32_t input_padding_right, 1932 uint32_t input_padding_bottom, 1933 uint32_t input_padding_left, 1934 uint32_t kernel_height, 1935 uint32_t kernel_width, 1936 uint32_t subsampling_height, 1937 uint32_t subsampling_width, 1938 uint32_t dilation_height, 1939 uint32_t dilation_width, 1940 uint32_t groups, 1941 size_t group_input_channels, 1942 size_t group_output_channels, 1943 size_t input_channel_stride, 1944 size_t output_channel_stride, 1945 int8_t input_zero_point, 1946 float input_scale, 1947 float kernel_scale, 1948 const int8_t* kernel, 1949 const int32_t* bias, 1950 int8_t output_zero_point, 1951 float output_scale, 1952 int8_t output_min, 1953 int8_t output_max, 1954 uint32_t flags, 1955 xnn_operator_t* convolution_op_out); 1956 1957 enum xnn_status xnn_setup_convolution2d_nhwc_qs8( 1958 xnn_operator_t convolution_op, 1959 size_t batch_size, 1960 size_t input_height, 1961 size_t input_width, 1962 const int8_t* input, 1963 int8_t* output, 1964 pthreadpool_t threadpool); 1965 1966 enum xnn_status xnn_create_global_average_pooling_nwc_qs8( 1967 size_t channels, 1968 size_t input_stride, 1969 size_t output_stride, 1970 int8_t input_zero_point, 1971 float input_scale, 1972 int8_t output_zero_point, 1973 float output_scale, 1974 int8_t output_min, 1975 int8_t output_max, 1976 uint32_t flags, 1977 xnn_operator_t* global_average_pooling_op_out); 1978 1979 enum xnn_status xnn_setup_global_average_pooling_nwc_qs8( 1980 xnn_operator_t global_average_pooling_op, 1981 size_t batch_size, 1982 size_t width, 1983 const int8_t* input, 1984 int8_t* output, 1985 pthreadpool_t threadpool); 1986 1987 #endif // XNN_NO_QS8_OPERATORS 1988 1989 #ifndef XNN_NO_QU8_OPERATORS 1990 1991 enum xnn_status xnn_create_average_pooling2d_nhwc_qu8( 1992 uint32_t input_padding_top, 1993 uint32_t input_padding_right, 1994 uint32_t input_padding_bottom, 1995 uint32_t input_padding_left, 1996 uint32_t pooling_height, 1997 uint32_t pooling_width, 1998 uint32_t stride_height, 1999 uint32_t stride_width, 2000 size_t channels, 2001 size_t input_pixel_stride, 2002 size_t output_pixel_stride, 2003 uint8_t input_zero_point, 2004 float input_scale, 2005 uint8_t output_zero_point, 2006 float output_scale, 2007 uint8_t output_min, 2008 uint8_t output_max, 2009 uint32_t flags, 2010 xnn_operator_t* average_pooling_op_out); 2011 2012 enum xnn_status xnn_setup_average_pooling2d_nhwc_qu8( 2013 xnn_operator_t average_pooling_op, 2014 size_t batch_size, 2015 size_t input_height, 2016 size_t input_width, 2017 const uint8_t* input, 2018 uint8_t* output, 2019 pthreadpool_t threadpool); 2020 2021 enum xnn_status xnn_create_convolution2d_nhwc_qu8( 2022 uint32_t input_padding_top, 2023 uint32_t input_padding_right, 2024 uint32_t input_padding_bottom, 2025 uint32_t input_padding_left, 2026 uint32_t kernel_height, 2027 uint32_t kernel_width, 2028 uint32_t subsampling_height, 2029 uint32_t subsampling_width, 2030 uint32_t dilation_height, 2031 uint32_t dilation_width, 2032 uint32_t groups, 2033 size_t group_input_channels, 2034 size_t group_output_channels, 2035 size_t input_channel_stride, 2036 size_t output_channel_stride, 2037 uint8_t input_zero_point, 2038 float input_scale, 2039 uint8_t kernel_zero_point, 2040 float kernel_scale, 2041 const uint8_t* kernel, 2042 const int32_t* bias, 2043 uint8_t output_zero_point, 2044 float output_scale, 2045 uint8_t output_min, 2046 uint8_t output_max, 2047 uint32_t flags, 2048 xnn_operator_t* convolution_op_out); 2049 2050 enum xnn_status xnn_setup_convolution2d_nhwc_qu8( 2051 xnn_operator_t convolution_op, 2052 size_t batch_size, 2053 size_t input_height, 2054 size_t input_width, 2055 const uint8_t* input, 2056 uint8_t* output, 2057 pthreadpool_t threadpool); 2058 2059 enum xnn_status xnn_create_deconvolution2d_nhwc_qu8( 2060 uint32_t output_padding_top, 2061 uint32_t output_padding_right, 2062 uint32_t output_padding_bottom, 2063 uint32_t output_padding_left, 2064 uint32_t kernel_height, 2065 uint32_t kernel_width, 2066 uint32_t stride_height, 2067 uint32_t stride_width, 2068 uint32_t dilation_height, 2069 uint32_t dilation_width, 2070 uint32_t groups, 2071 size_t group_input_channels, 2072 size_t group_output_channels, 2073 size_t input_pixel_stride, 2074 size_t output_pixel_stride, 2075 uint8_t input_zero_point, 2076 float input_scale, 2077 uint8_t kernel_zero_point, 2078 float kernel_scale, 2079 const uint8_t* kernel, 2080 const int32_t* bias, 2081 uint8_t output_zero_point, 2082 float output_scale, 2083 uint8_t output_min, 2084 uint8_t output_max, 2085 uint32_t flags, 2086 xnn_operator_t* deconvolution_op_out); 2087 2088 enum xnn_status xnn_setup_deconvolution2d_nhwc_qu8( 2089 xnn_operator_t deconvolution_op, 2090 size_t batch_size, 2091 size_t input_height, 2092 size_t input_width, 2093 uint32_t adjustment_height, 2094 uint32_t adjustment_width, 2095 const uint8_t* input, 2096 uint8_t* output, 2097 pthreadpool_t threadpool); 2098 2099 enum xnn_status xnn_create_fully_connected_nc_qu8( 2100 size_t input_channels, 2101 size_t output_channels, 2102 size_t input_stride, 2103 size_t output_stride, 2104 uint8_t input_zero_point, 2105 float input_scale, 2106 uint8_t kernel_zero_point, 2107 float kernel_scale, 2108 const uint8_t* kernel, 2109 const int32_t* bias, 2110 uint8_t output_zero_point, 2111 float output_scale, 2112 uint8_t output_min, 2113 uint8_t output_max, 2114 uint32_t flags, 2115 xnn_operator_t* fully_connected_op_out); 2116 2117 enum xnn_status xnn_setup_fully_connected_nc_qu8( 2118 xnn_operator_t fully_connected_op, 2119 size_t batch_size, 2120 const uint8_t* input, 2121 uint8_t* output, 2122 pthreadpool_t threadpool); 2123 2124 enum xnn_status xnn_create_global_average_pooling_nwc_qu8( 2125 size_t channels, 2126 size_t input_stride, 2127 size_t output_stride, 2128 uint8_t input_zero_point, 2129 float input_scale, 2130 uint8_t output_zero_point, 2131 float output_scale, 2132 uint8_t output_min, 2133 uint8_t output_max, 2134 uint32_t flags, 2135 xnn_operator_t* global_average_pooling_op_out); 2136 2137 enum xnn_status xnn_setup_global_average_pooling_nwc_qu8( 2138 xnn_operator_t global_average_pooling_op, 2139 size_t batch_size, 2140 size_t width, 2141 const uint8_t* input, 2142 uint8_t* output, 2143 pthreadpool_t threadpool); 2144 2145 enum xnn_status xnn_create_leaky_relu_nc_qu8( 2146 size_t channels, 2147 size_t input_stride, 2148 size_t output_stride, 2149 float negative_slope, 2150 uint8_t input_zero_point, 2151 float input_scale, 2152 uint8_t output_zero_point, 2153 float output_scale, 2154 uint8_t output_min, 2155 uint8_t output_max, 2156 uint32_t flags, 2157 xnn_operator_t* leaky_relu_op_out); 2158 2159 enum xnn_status xnn_setup_leaky_relu_nc_qu8( 2160 xnn_operator_t leaky_relu_op, 2161 size_t batch_size, 2162 const uint8_t* input, 2163 uint8_t* output, 2164 pthreadpool_t threadpool); 2165 2166 enum xnn_status xnn_create_sigmoid_nc_qu8( 2167 size_t channels, 2168 size_t input_stride, 2169 size_t output_stride, 2170 uint8_t input_zero_point, 2171 float input_scale, 2172 uint8_t output_zero_point, 2173 float output_scale, 2174 uint8_t output_min, 2175 uint8_t output_max, 2176 uint32_t flags, 2177 xnn_operator_t* sigmoid_op_out); 2178 2179 enum xnn_status xnn_setup_sigmoid_nc_qu8( 2180 xnn_operator_t sigmoid_op, 2181 size_t batch_size, 2182 const uint8_t* input, 2183 uint8_t* output, 2184 pthreadpool_t threadpool); 2185 2186 enum xnn_status xnn_create_softmax_nc_qu8( 2187 size_t channels, 2188 size_t input_stride, 2189 size_t output_stride, 2190 float input_scale, 2191 uint8_t output_zero_point, 2192 float output_scale, 2193 uint32_t flags, 2194 xnn_operator_t* softmax_op_out); 2195 2196 enum xnn_status xnn_setup_softmax_nc_qu8( 2197 xnn_operator_t softmax_op, 2198 size_t batch_size, 2199 const uint8_t* input, 2200 uint8_t* output, 2201 pthreadpool_t threadpool); 2202 2203 #endif // XNN_NO_QU8_OPERATORS 2204 2205 #ifndef XNN_NO_U8_OPERATORS 2206 2207 enum xnn_status xnn_create_clamp_nc_u8( 2208 size_t channels, 2209 size_t input_stride, 2210 size_t output_stride, 2211 uint8_t output_min, 2212 uint8_t output_max, 2213 uint32_t flags, 2214 xnn_operator_t* clamp_op_out); 2215 2216 enum xnn_status xnn_setup_clamp_nc_u8( 2217 xnn_operator_t clamp_op, 2218 size_t batch_size, 2219 const uint8_t* input, 2220 uint8_t* output, 2221 pthreadpool_t threadpool); 2222 2223 enum xnn_status xnn_create_max_pooling2d_nhwc_u8( 2224 uint32_t input_padding_top, 2225 uint32_t input_padding_right, 2226 uint32_t input_padding_bottom, 2227 uint32_t input_padding_left, 2228 uint32_t pooling_height, 2229 uint32_t pooling_width, 2230 uint32_t stride_height, 2231 uint32_t stride_width, 2232 uint32_t dilation_height, 2233 uint32_t dilation_width, 2234 size_t channels, 2235 size_t input_pixel_stride, 2236 size_t output_pixel_stride, 2237 uint8_t output_min, 2238 uint8_t output_max, 2239 uint32_t flags, 2240 xnn_operator_t* max_pooling_op_out); 2241 2242 enum xnn_status xnn_setup_max_pooling2d_nhwc_u8( 2243 xnn_operator_t max_pooling_op, 2244 size_t batch_size, 2245 size_t input_height, 2246 size_t input_width, 2247 const uint8_t* input, 2248 uint8_t* output, 2249 pthreadpool_t threadpool); 2250 2251 #endif // XNN_NO_U8_OPERATORS 2252 2253 #ifndef XNN_NO_X8_OPERATORS 2254 2255 enum xnn_status xnn_create_channel_shuffle_nc_x8( 2256 size_t groups, 2257 size_t group_channels, 2258 size_t input_stride, 2259 size_t output_stride, 2260 uint32_t flags, 2261 xnn_operator_t* channel_shuffle_op_out); 2262 2263 enum xnn_status xnn_setup_channel_shuffle_nc_x8( 2264 xnn_operator_t channel_shuffle_op, 2265 size_t batch_size, 2266 const void* input, 2267 void* output, 2268 pthreadpool_t threadpool); 2269 2270 #endif // XNN_NO_X8_OPERATORS 2271 2272 #ifdef __cplusplus 2273 } // extern "C" 2274 #endif 2275