• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (c) Facebook, Inc. and its affiliates.
2 // All rights reserved.
3 //
4 // Copyright 2019 Google LLC
5 //
6 // This source code is licensed under the BSD-style license found in the
7 // LICENSE file in the root directory of this source tree.
8 
9 #pragma once
10 
11 #include <stdbool.h>
12 #include <stddef.h>
13 #include <stdint.h>
14 
15 #include <pthreadpool.h>
16 
17 #ifdef __cplusplus
18 extern "C" {
19 #endif
20 
21 /// The number of bytes XNNPACK may read beyond array bounds.
22 /// The caller must allocate at least this many extra bytes after the tensor data passed to XNNPACK.
23 ///
24 /// Note: XNNPACK reads, but never writes beyond array bounds.
25 #define XNN_EXTRA_BYTES 16
26 
27 /// Maximum number of dimensions in tensor shape.
28 #define XNN_MAX_TENSOR_DIMS 6
29 
30 /// Allow sparse inference in a Runtime.
31 ///
32 /// Note: this flag forces XNNPACK to consider sparse inference, but does not guarantee it.
33 #define XNN_FLAG_SPARSE_INFERENCE 0x00000001
34 
35 /// Allow IEEE FP16 inference in a Runtime.
36 ///
37 /// Note: this flag forces XNNPACK to consider IEEE FP16 inference, but does not guarantee it.
38 #define XNN_FLAG_FP16_INFERENCE 0x00000002
39 
40 /// The convolution operator represents a depthwise convolution, and use HWGo layout for filters.
41 #define XNN_FLAG_DEPTHWISE_CONVOLUTION 0x00000001
42 
43 /// Assume transposed weights in a fully connected operator.
44 #define XNN_FLAG_TRANSPOSE_WEIGHTS 0x00000001
45 
46 /// The operator assumes NHWC layout for the input, regardless of the output layout.
47 #define XNN_FLAG_INPUT_NHWC 0x00000002
48 
49 /// Match "SAME" padding in TensorFlow. Exact padding values are computed dynamically depending on input size.
50 #define XNN_FLAG_TENSORFLOW_SAME_PADDING 0x00000004
51 
52 /// Implicitly flatten and reshape input of a Fully Connected operator into a 2D tensor.
53 #define XNN_FLAG_TENSORFLOW_RESHAPE_2D 0x00000004
54 
55 /// Match behaviour of TensorFlow 1.x.
56 #define XNN_FLAG_TENSORFLOW_LEGACY_MODE 0x00000004
57 
58 /// Static weights of the FP16 operator are in FP32 format.
59 #define XNN_FLAG_FP32_STATIC_WEIGHTS 0x00000008
60 
61 /// Align corners of input and output images in resize operations.
62 #define XNN_FLAG_ALIGN_CORNERS 0x00000008
63 
64 /// Yield worker threads of the thread pool to the system scheduler after the inference.
65 #define XNN_FLAG_YIELD_WORKERS 0x00000010
66 
67 /// Status code for any XNNPACK function call.
68 enum xnn_status {
69   /// The call succeeded, and all output arguments now contain valid data.
70   xnn_status_success = 0,
71   xnn_status_uninitialized = 1,
72   xnn_status_invalid_parameter = 2,
73   xnn_status_invalid_state = 3,
74   xnn_status_unsupported_parameter = 4,
75   xnn_status_unsupported_hardware = 5,
76   xnn_status_out_of_memory = 6,
77 };
78 
79 struct xnn_allocator {
80   /// User-specified pointer that will be passed as-is to all functions in this structure.
81   void* context;
82   /// Pointer to a function to be called for general memory allocation.
83   ///
84   /// @param context - The user-specified pointer from xnn_allocator structure.
85   /// @param size - The size of the memory block to allocate, in bytes.
86   ///
87   /// @returns Pointer to the allocated memory block of at least @ref size bytes.
88   ///          If allocation fails, the function must return NULL.
89   void* (*allocate)(void* context, size_t size);
90   /// Pointer to a function to be called for general memory re-allocation, i.e. to increase or shrink a previously
91   /// allocated memory block. The content of the old memory block is copied to the new memory block.
92   ///
93   /// @param context - The user-specified pointer from xnn_allocator structure.
94   /// @param pointer - Pointer to a memory block allocated by @ref allocate or @ref reallocate functions. Can be NULL.
95   ///                  If the pointer is NULL, the @ref reallocate call is equivalent to an @ref allocate call.
96   /// @param size - The new size of the memory block to allocate, in bytes.
97   ///
98   /// @returns Pointer to the newly allocated memory block of at least @ref size bytes with the content of the previous
99   ///          memory block.
100   ///          If allocation fails, the function must return NULL, but must not release the previous memory block.
101   void* (*reallocate)(void* context, void* pointer, size_t size);
102   /// Pointer to a function to be called for general memory de-allocation.
103   ///
104   /// @param context - The user-specified pointer from xnn_allocator structure.
105   /// @param pointer - Pointer to a memory block allocated by @ref allocate or @ref reallocate functions. Can be NULL.
106   ///                  If the pointer is NULL, the @ref deallocate call is a no-op.
107   void (*deallocate)(void* context, void* pointer);
108   /// Pointer to a function to be called for aligned memory allocation.
109   ///
110   /// @param context - The user-specified pointer from xnn_allocator structure.
111   /// @param alignment - The alignment of the memory block to allocate, in bytes. Alignment is always a power-of-2.
112   /// @param size - The size of the memory block to allocate, in bytes.
113   ///
114   /// @returns Pointer to the allocated memory block of at least @ref size bytes.
115   ///          If allocation fails, the function must return NULL.
116   void* (*aligned_allocate)(void* context, size_t alignment, size_t size);
117   /// Pointer to a function to be called for aligned memory de-allocation.
118   ///
119   /// @param context - The user-specified pointer from xnn_allocator structure.
120   /// @param pointer - Pointer to a memory block allocated by @ref aligned_allocate function. Can be NULL.
121   ///                  If the pointer is NULL, the @ref aligned_deallocate call is a no-op.
122   void (*aligned_deallocate)(void* context, void* pointer);
123 };
124 
125 /// Initialize XNNPACK library.
126 ///
127 /// XNNPACK must be successfully initialized before use. During initialization, XNNPACK populates internal structures
128 /// depending on the host processor. Initialization can be time-consuming.
129 ///
130 /// @param[in] allocator - structure with function pointers to be use for memory allocation and de-allocation.
131 ///                        If this argument is NULL, system-provided memory management functions (e.g. malloc/free)
132 ///                        will be used.
133 ///
134 /// @retval xnn_status_success - XNNPACK is successfully initialized and ready to use.
135 /// @retval xnn_status_out_of_memory - initialization failed due to out-of-memory condition.
136 /// @retval xnn_status_unsupported_hardware - initialization failed because the host processor does not satisfy the
137 ///                                           minimum hardware requirements for XNNPACK. E.g. this may happen on x86
138 ///                                           processors without SSE2 extension, or on 32-bit ARM processors without
139 ///                                           the NEON SIMD extension.
140 enum xnn_status xnn_initialize(const struct xnn_allocator* allocator);
141 
142 /// Deinitialize XNNPACK library.
143 ///
144 /// To avoid memory and resource leaks, users must call xnn_deinitialize once for each successful xnn_initialize call.
145 ///
146 /// @retval xnn_status_success - deinitialization call succeeded.
147 enum xnn_status xnn_deinitialize(void);
148 
149 /// Subgraph is an abstract representation of a neural network model.
150 /// Subgraph objects are used to define Values (tensors) and Nodes (operators) comprising the model.
151 typedef struct xnn_subgraph* xnn_subgraph_t;
152 
153 /// Create a empty Subgraph object.
154 ///
155 /// @param external_value_ids - number of Value IDs to reserve for communication with external graph representation.
156 ///                             The Subgraph object would avoid creating internal Value IDs in the
157 ///                             [0, reserved_value_ids-1] range.
158 /// @param flags - binary features of the subgraph. No supported flags are currently defined.
159 /// @param subgraph_out - pointer to the variable that will be initialized with a handle to the Subgraph object upon
160 ///                       successful return.
161 enum xnn_status xnn_create_subgraph(
162   uint32_t external_value_ids,
163   uint32_t flags,
164   xnn_subgraph_t* subgraph_out);
165 
166 /// Destroy a Subgraph object, as well as Values, and Nodes associated with the subgraph.
167 ///
168 /// @param subgraph - the Subgraph object to destroy.
169 enum xnn_status xnn_delete_subgraph(
170   xnn_subgraph_t subgraph);
171 
172 #define XNN_VALUE_FLAG_EXTERNAL_INPUT  0x00000001
173 #define XNN_VALUE_FLAG_EXTERNAL_OUTPUT 0x00000002
174 
175 #define XNN_INVALID_VALUE_ID UINT32_MAX
176 
177 /// Type of elements in a Value object.
178 enum xnn_datatype {
179   /// Invalid data type. Valid Values never have this datatype.
180   xnn_datatype_invalid = 0,
181   /// IEEE754 single-precision floating-point.
182   xnn_datatype_fp32 = 1,
183   /// IEEE754 half-precision floating-point.
184   xnn_datatype_fp16 = 2,
185   /// Quantized 8-bit signed integer with shared per-Value quantization parameters.
186   xnn_datatype_qint8 = 3,
187   /// Quantized 8-bit unsigned integer with shared per-Value quantization parameters.
188   xnn_datatype_quint8 = 4,
189   /// Quantized 32-bit signed integer with shared per-Value quantization parameters.
190   xnn_datatype_qint32 = 5,
191   /// Quantized 8-bit signed integer with shared per-channel quantization parameters.
192   xnn_datatype_qcint8 = 6,
193   /// Quantized 32-bit signed integer with shared per-channel quantization parameters.
194   xnn_datatype_qcint32 = 7,
195 };
196 
197 /// Define a tensor-type Value and add it to a Subgraph.
198 ///
199 /// @param subgraph - a Subgraph object that will own the created Value.
200 /// @param datatype - type of the tensor elements.
201 /// @param num_dims - number of dimensions in the shape.
202 /// @param dims - pointer to an array of @a num_dims shape dimensions. If num_dims is 0, this pointer can be NULL.
203 ///               XNNPACK does not keep any pointers to this array after the function returns.
204 /// @param data - pointer to static data used for tensor initialization. If the tensor is not statically initialized,
205 ///               this pointer must be is NULL. If non-NULL, the life-time of the static data must exceed the life-time
206 ///               of the Subgraph object, and of any Runtime objects created from the Subgraph.
207 /// @param external_id - external ID for the Value. The ID must be within the range of reversed Value IDs specified on
208 ///                      the Subgraph creation. If the external ID is XNN_INVALID_VALUE_ID, an internal ID will be
209 ///                      created for the Value.
210 /// @param flags - binary features of the Value. Supported values are any combination of XNN_VALUE_FLAG_EXTERNAL_INPUT
211 ///                and XNN_VALUE_FLAG_EXTERNAL_OUTPUT.
212 /// @param id_out - pointer to the variable that will be initialized with the Value ID upon successful return. If a
213 ///                 valid @a external_id was provided, the variable will be initialized with the @a external_id value.
214 enum xnn_status xnn_define_tensor_value(
215   xnn_subgraph_t subgraph,
216   enum xnn_datatype datatype,
217   size_t num_dims,
218   const size_t* dims,
219   const void* data,
220   uint32_t external_id,
221   uint32_t flags,
222   uint32_t* id_out);
223 
224 /// Define a quantized tensor-type Value and add it to a Subgraph.
225 ///
226 /// @param subgraph - a Subgraph object that will own the created Value.
227 /// @param datatype - type of the tensor elements.
228 /// @param zero_point - offset from zero to subtract from the quantized elements in the Value.
229 /// @param scale - multiplication factor to convert quantized elements to real representation.
230 /// @param num_dims - number of dimensions in the shape.
231 /// @param dims - pointer to an array of @a num_dims shape dimensions. If num_dims is 0, this pointer can be NULL.
232 ///               XNNPACK does not keep any pointers to this array after the function returns.
233 /// @param data - pointer to static data used for tensor initialization. If the tensor is not statically initialized,
234 ///               this pointer must be is NULL. If non-NULL, the life-time of the static data must exceed the life-time
235 ///               of the Subgraph object, and of any Runtime objects created from the Subgraph.
236 /// @param external_id - external ID for the Value. The ID must be within the range of reversed Value IDs specified on
237 ///                      the Subgraph creation. If the external ID is XNN_INVALID_VALUE_ID, an internal ID will be
238 ///                      created for the Value.
239 /// @param flags - binary features of the Value. Supported values are any combination of XNN_VALUE_FLAG_EXTERNAL_INPUT
240 ///                and XNN_VALUE_FLAG_EXTERNAL_OUTPUT.
241 /// @param id_out - pointer to the variable that will be initialized with the Value ID upon successful return. If a
242 ///                 valid @a external_id was provided, the variable will be initialized with the @a external_id value.
243 enum xnn_status xnn_define_quantized_tensor_value(
244   xnn_subgraph_t subgraph,
245   enum xnn_datatype datatype,
246   int32_t zero_point,
247   float scale,
248   size_t num_dims,
249   const size_t* dims,
250   const void* data,
251   uint32_t external_id,
252   uint32_t flags,
253   uint32_t* id_out);
254 
255 /// Define a channelwise quantized tensor-type Value and add it to a Subgraph.
256 ///
257 /// @param subgraph - a Subgraph object that will own the created Value.
258 /// @param datatype - type of the tensor elements.
259 /// @param scale - per-channel multiplication factors to convert quantized elements to real representation.
260 /// @param num_dims - number of dimensions in the shape.
261 /// @param channel_dim - index of the channel dimension in the tensor with per-channel quantization parameters.
262 ///                      Typically this is the first dimension (dimension #0) of the filter tensors in the Convolution,
263 ///                      Deconvolution, and Fully Connected operators and the last dimension of the filter tensors in
264 ///                      the Depthwise Convolution operators.
265 /// @param dims - pointer to an array of @a num_dims shape dimensions. If num_dims is 0, this pointer can be NULL.
266 ///               XNNPACK does not keep any pointers to this array after the function returns.
267 /// @param data - pointer to static data used for tensor initialization. If the tensor is not statically initialized,
268 ///               this pointer must be is NULL. If non-NULL, the life-time of the static data must exceed the life-time
269 ///               of the Subgraph object, and of any Runtime objects created from the Subgraph.
270 /// @param external_id - external ID for the Value. The ID must be within the range of reversed Value IDs specified on
271 ///                      the Subgraph creation. If the external ID is XNN_INVALID_VALUE_ID, an internal ID will be
272 ///                      created for the Value.
273 /// @param flags - binary features of the Value. Supported values are any combination of XNN_VALUE_FLAG_EXTERNAL_INPUT
274 ///                and XNN_VALUE_FLAG_EXTERNAL_OUTPUT.
275 /// @param id_out - pointer to the variable that will be initialized with the Value ID upon successful return. If a
276 ///                 valid @a external_id was provided, the variable will be initialized with the @a external_id value.
277 enum xnn_status xnn_define_channelwise_quantized_tensor_value(
278   xnn_subgraph_t subgraph,
279   enum xnn_datatype datatype,
280   const float* scale,
281   size_t num_dims,
282   size_t channel_dim,
283   const size_t* dims,
284   const void* data,
285   uint32_t external_id,
286   uint32_t flags,
287   uint32_t* id_out);
288 
289 /// Define a Convert Node and add it to a Subgraph.
290 ///
291 /// @param subgraph - a Subgraph object that will own the created Node.
292 /// @param input_id - Value ID for the input tensor. The input tensor must be defined in the @a subgraph.
293 /// @param output_id - Value ID for the output tensor. The output tensor must be defined in the @a subgraph, and its
294 ///                    shape must match the shape of the input tensor.
295 /// @param flags - binary features of the Convert Node. No supported flags are currently defined.
296 enum xnn_status xnn_define_convert(
297   xnn_subgraph_t subgraph,
298   uint32_t input_id,
299   uint32_t output_id,
300   uint32_t flags);
301 
302 /// Define a 2D Convolution Node and add it to a Subgraph.
303 ///
304 /// @param subgraph - a Subgraph object that will own the created Node.
305 /// @param input_padding_top - implicit zero-padding above 2D input data. Must be 0 if XNN_FLAG_TENSORFLOW_SAME_PADDING
306 ///                            flag is specified.
307 /// @param input_padding_right - implicit zero-padding to the right of 2D input data. Must be 0 if
308 ///                              XNN_FLAG_TENSORFLOW_SAME_PADDING flag is specified.
309 /// @param input_padding_bottom - implicit zero-padding below 2D input data. Must be 0 if
310 ///                               XNN_FLAG_TENSORFLOW_SAME_PADDING flag is specified.
311 /// @param input_padding_left - implicit zero-padding to the left of 2D input data. Must be 0 if
312 ///                             XNN_FLAG_TENSORFLOW_SAME_PADDING flag is specified.
313 /// @param kernel_height - kernel (filter) height.
314 /// @param kernel_width - kernel (filter) width.
315 /// @param subsampling_height - height of subsampling region for convolution output (convolution height stride).
316 /// @param subsampling_width - width of subsampling region for convolution output (convolution width stride).
317 /// @param dilation_height - dilation of kernel elements along the height dimension.
318 /// @param dilation_width - dilation of kernel elements along the width dimension.
319 /// @param groups - number of convolution groups.
320 /// @param group_input_channels - number of input channels per group.
321 /// @param group_output_channels - number of output channels per group.
322 /// @param output_min - lower bound for clipping output values.
323 /// @param output_max - upper bound for clipping output values.
324 /// @param input_id - Value ID for the input tensor. The input tensor must be a 4D tensor defined in the @a subgraph
325 ///                   with [N, IH, IW, groups * group_input_channels] dimensions
326 /// @param filter_id - Value ID for the filter tensor. The filter tensor must ge a 4D tensor defined in the @a subgraph
327 ///                    with [groups * group_output_channels, kernel_height, kernel_width, group_input_channels]
328 ///                    dimensions.
329 /// @param bias_id - Value ID for the bias tensor. The bias tensor must be a 1D tensor defined in the @a subgraph with
330 ///                  [groups * group_output_channels] dimensions.
331 /// @param output_id - Value ID for the output tensor. The output tensor must be a 4D tensor defined in the @a subgraph
332 ///                    with [N, OH, OW, groups * group_output_channels] dimensions.
333 /// @param flags - binary features of the 2D Convolution Node. The only currently supported values is
334 ///                XNN_FLAG_TENSORFLOW_SAME_PADDING.
335 enum xnn_status xnn_define_convolution_2d(
336   xnn_subgraph_t subgraph,
337   uint32_t input_padding_top,
338   uint32_t input_padding_right,
339   uint32_t input_padding_bottom,
340   uint32_t input_padding_left,
341   uint32_t kernel_height,
342   uint32_t kernel_width,
343   uint32_t subsampling_height,
344   uint32_t subsampling_width,
345   uint32_t dilation_height,
346   uint32_t dilation_width,
347   uint32_t groups,
348   size_t group_input_channels,
349   size_t group_output_channels,
350   float output_min,
351   float output_max,
352   uint32_t input_id,
353   uint32_t filter_id,
354   uint32_t bias_id,
355   uint32_t output_id,
356   uint32_t flags);
357 
358 /// Define a 2D Deconvolution (Transposed Convolution) Node and add it to a Subgraph.
359 ///
360 /// @param subgraph - a Subgraph object that will own the created Node.
361 /// @param padding_top - implicit padding above 2D output data.
362 /// @param padding_right - implicit padding to the right of 2D output data.
363 /// @param padding_bottom - implicit padding below 2D output data.
364 /// @param padding_left - implicit padding to the left of 2D output data.
365 /// @param adjustment_height - additional elements in the bottom of the 2D output data.
366 /// @param adjustment_width - additional elements to the right of the 2D output data.
367 /// @param kernel_height - kernel (filter) height.
368 /// @param kernel_width - kernel (filter) width.
369 /// @param upsampling_height - height of upsampling region for deconvolution input (deconvolution height stride).
370 /// @param upsampling_width - width of upsampling region for deconvolution input (deconvolution width stride).
371 /// @param dilation_height - dilation of kernel elements along the height dimension.
372 /// @param dilation_width - dilation of kernel elements along the width dimension.
373 /// @param groups - number of convolution groups.
374 /// @param group_input_channels - number of input channels per group.
375 /// @param group_output_channels - number of output channels per group.
376 /// @param output_min - lower bound for clipping output values.
377 /// @param output_max - upper bound for clipping output values.
378 /// @param input_id - Value ID for the input tensor. The input tensor must be a 4D tensor defined in the @a subgraph
379 ///                   with [N, IH, IW, groups * group_input_channels] dimensions
380 /// @param filter_id - Value ID for the filter tensor. The filter tensor must ge a 4D tensor defined in the @a subgraph
381 ///                    with [groups * group_output_channels, kernel_height, kernel_width, group_input_channels]
382 ///                    dimensions.
383 /// @param bias_id - Value ID for the bias tensor, or XNN_INVALID_VALUE_ID for a 2D Convolution Node without a bias. If
384 ///                  present, the bias tensor must be a 1D tensor defined in the @a subgraph with
385 ///                  [groups * group_output_channels] dimensions.
386 /// @param output_id - Value ID for the output tensor. The output tensor must be a 4D tensor defined in the @a subgraph
387 ///                    with [N, OH, OW, groups * group_output_channels] dimensions.
388 /// @param flags - binary features of the 2D Deconvolution Node. No supported flags are currently defined.
389 enum xnn_status xnn_define_deconvolution_2d(
390   xnn_subgraph_t subgraph,
391   uint32_t padding_top,
392   uint32_t padding_right,
393   uint32_t padding_bottom,
394   uint32_t padding_left,
395   uint32_t adjustment_height,
396   uint32_t adjustment_width,
397   uint32_t kernel_height,
398   uint32_t kernel_width,
399   uint32_t upsampling_height,
400   uint32_t upsampling_width,
401   uint32_t dilation_height,
402   uint32_t dilation_width,
403   uint32_t groups,
404   size_t group_input_channels,
405   size_t group_output_channels,
406   float output_min,
407   float output_max,
408   uint32_t input_id,
409   uint32_t filter_id,
410   uint32_t bias_id,
411   uint32_t output_id,
412   uint32_t flags);
413 
414 /// Define a 2D Depthwise Convolution Node and add it to a Subgraph.
415 ///
416 /// @param subgraph - a Subgraph object that will own the created Node.
417 /// @param input_padding_top - implicit zero-padding above 2D input data. Must be 0 if XNN_FLAG_TENSORFLOW_SAME_PADDING
418 ///                            flag is specified.
419 /// @param input_padding_right - implicit zero-padding to the right of 2D input data. Must be 0 if
420 ///                              XNN_FLAG_TENSORFLOW_SAME_PADDING flag is specified.
421 /// @param input_padding_bottom - implicit zero-padding below 2D input data. Must be 0 if
422 ///                               XNN_FLAG_TENSORFLOW_SAME_PADDING flag is specified.
423 /// @param input_padding_left - implicit zero-padding to the left of 2D input data. Must be 0 if
424 ///                             XNN_FLAG_TENSORFLOW_SAME_PADDING flag is specified.
425 /// @param kernel_height - kernel (filter) height.
426 /// @param kernel_width - kernel (filter) width.
427 /// @param subsampling_height - height of subsampling region for convolution output (convolution height stride).
428 /// @param subsampling_width - width of subsampling region for convolution output (convolution width stride).
429 /// @param dilation_height - dilation of kernel elements along the height dimension.
430 /// @param dilation_width - dilation of kernel elements along the width dimension.
431 /// @param depth_multiplier - ratio of output channels to input channels.
432 /// @param input_channels - number of input channels.
433 /// @param output_min - lower bound for clipping output values.
434 /// @param output_max - upper bound for clipping output values.
435 /// @param input_id - Value ID for the input tensor. The input tensor must be a 4D tensor defined in the @a subgraph
436 ///                   with [N, IH, IW, input_channels] dimensions
437 /// @param filter_id - Value ID for the filter tensor. The filter tensor must ge a 4D tensor defined in the @a subgraph
438 ///                    with [1, kernel_height, kernel_width, input_channels * depth_multiplier] dimensions.
439 /// @param bias_id - Value ID for the bias tensor, or XNN_INVALID_VALUE_ID for a 2D Depthwise Convolution Node without
440 ///                  a bias. If present, the bias tensor must be a 1D tensor defined in the @a subgraph with
441 ///                  [input_channels * depth_multiplier] dimensions.
442 /// @param output_id - Value ID for the output tensor. The output tensor must be a 4D tensor defined in the @a subgraph
443 ///                    with [N, OH, OW, input_channels * depth_multiplier] dimensions.
444 /// @param flags - binary features of the 2D Depthwise Convolution Node. The only currently supported values is
445 ///                XNN_FLAG_TENSORFLOW_SAME_PADDING.
446 enum xnn_status xnn_define_depthwise_convolution_2d(
447   xnn_subgraph_t subgraph,
448   uint32_t input_padding_top,
449   uint32_t input_padding_right,
450   uint32_t input_padding_bottom,
451   uint32_t input_padding_left,
452   uint32_t kernel_height,
453   uint32_t kernel_width,
454   uint32_t subsampling_height,
455   uint32_t subsampling_width,
456   uint32_t dilation_height,
457   uint32_t dilation_width,
458   uint32_t depth_multiplier,
459   size_t input_channels,
460   float output_min,
461   float output_max,
462   uint32_t input_id,
463   uint32_t filter_id,
464   uint32_t bias_id,
465   uint32_t output_id,
466   uint32_t flags);
467 
468 /// Define a Depth To Space Node and add it to a Subgraph.
469 ///
470 /// The Depth To Space Node rearranges data from depth into blocks of spatial data (a reverse transform to
471 /// Space To Depth). For a given input pixel, an output square of pixels with side @a block_size is formed from values
472 /// in the corresponding number of its channels. The output depth is therefore @a block_size x @a block_size times
473 /// smaller than that of the input.
474 ///
475 /// @param subgraph - a Subgraph object that will own the created Node.
476 /// @param input_id - Value ID for the input tensor. The input tensor must be a 4D tensor defined in the @a subgraph
477 ///                   with [N, IH, IW, OC * block_size * block_size] dimensions.
478 /// @param output_id - Value ID for the output tensor. The output tensor must be a 4D tensor defined in the @a subgraph
479 ///                    with [N, IH * block_size, IW * block_size, OC] dimensions.
480 /// @param block_size - the size of the spatial block.
481 /// @param flags - binary features of the input_channels Node. No supported flags are currently defined.
482 enum xnn_status xnn_define_depth_to_space(
483   xnn_subgraph_t subgraph,
484   uint32_t input_id,
485   uint32_t output_id,
486   uint32_t block_size,
487   uint32_t flags);
488 
489 /// Define a 2D Global Average Pooling Node and add it to a Subgraph.
490 ///
491 /// @param subgraph - a Subgraph object that will own the created Node.
492 /// @param output_min - lower bound for clipping output values.
493 /// @param output_max - upper bound for clipping output values.
494 /// @param input_id - Value ID for the input tensor. The input tensor must be a 4D tensor defined in the @a subgraph
495 ///                   with [N, H, W, C] dimensions
496 /// @param output_id - Value ID for the output tensor. The output tensor must be a 4D tensor defined in the @a subgraph
497 ///                    with [N, 1, 1, C] dimensions.
498 /// @param flags - binary features of the 2D Global Average Pooling Node. No supported flags are currently defined.
499 enum xnn_status xnn_define_global_average_pooling_2d(
500   xnn_subgraph_t subgraph,
501   float output_min,
502   float output_max,
503   uint32_t input_id,
504   uint32_t output_id,
505   uint32_t flags);
506 
507 /// Define a 2D Average Pooling Node and add it to a Subgraph.
508 ///
509 /// @param subgraph - a Subgraph object that will own the created Node.
510 /// @param input_padding_top - implicit zero-padding above 2D input data. Must be 0 if XNN_FLAG_TENSORFLOW_SAME_PADDING
511 ///                            flag is specified.
512 /// @param input_padding_right - implicit zero-padding to the right of 2D input data. Must be 0 if
513 ///                              XNN_FLAG_TENSORFLOW_SAME_PADDING flag is specified.
514 /// @param input_padding_bottom - implicit zero-padding below 2D input data. Must be 0 if
515 ///                               XNN_FLAG_TENSORFLOW_SAME_PADDING flag is specified.
516 /// @param input_padding_left - implicit zero-padding to the left of 2D input data. Must be 0 if
517 ///                             XNN_FLAG_TENSORFLOW_SAME_PADDING flag is specified.
518 /// @param pooling_height - pooling (kernel) height.
519 /// @param pooling_width - pooling (kernel) width.
520 /// @param stride_height - displacing of the pooling window in the vertical dimension of the input pixels corresponding
521 ///                        to vertically adjacent output pixels.
522 /// @param stride_width - displacing of the pooling window in the horizontal dimension of the input pixels corresponding
523 ///                        to horizontally adjacent output pixels.
524 /// @param output_min - lower bound for clipping output values.
525 /// @param output_max - upper bound for clipping output values.
526 /// @param input_id - Value ID for the input tensor. The input tensor must be a 4D tensor defined in the @a subgraph
527 ///                   with [N, IH, IW, channels] dimensions
528 /// @param output_id - Value ID for the output tensor. The output tensor must be a 4D tensor defined in the @a subgraph
529 ///                    with [N, OH, OW, channels] dimensions.
530 /// @param flags - binary features of the 2D Average Pooling Node. The only currently supported values is
531 ///                XNN_FLAG_TENSORFLOW_SAME_PADDING.
532 enum xnn_status xnn_define_average_pooling_2d(
533   xnn_subgraph_t subgraph,
534   uint32_t input_padding_top,
535   uint32_t input_padding_right,
536   uint32_t input_padding_bottom,
537   uint32_t input_padding_left,
538   uint32_t pooling_height,
539   uint32_t pooling_width,
540   uint32_t stride_height,
541   uint32_t stride_width,
542   float output_min,
543   float output_max,
544   uint32_t input_id,
545   uint32_t output_id,
546   uint32_t flags);
547 
548 /// Define a Fully Connected Node and add it to a Subgraph.
549 ///
550 /// @param subgraph - a Subgraph object that will own the created Node.
551 /// @param output_min - lower bound for clipping output values.
552 /// @param output_max - upper bound for clipping output values.
553 /// @param input_id - Value ID for the input tensor. The input tensor must be an N-dimensional tensor defined in the
554 ///                   @a subgraph. If XNN_FLAG_TENSORFLOW_RESHAPE_2D is not specified, the input tensor must be at least
555 ///                   1D and its last dimension must match the last dimension of the filter tensor. In particular, if
556 ///                   input is a 2D tensor, it must have [batch_size, input_channels] dimensions.
557 ///                   If XNN_FLAG_TENSORFLOW_RESHAPE_2D is specified, the number of elements in the input tensor must be
558 ///                   divisible by the input_channels. The tensor will be first flattened into a 1D tensor of
559 ///                   [num_input_elements] dimensions, then reshaped into a 2D tensor of
560 ///                   [num_input_elements / input_channels, input_channels] dimensions where num_input_elements is the
561 ///                   total number of elements in the input tensor.
562 /// @param filter_id - Value ID for the filter tensor. The filter tensor must a 2D tensor defined in the @a subgraph.
563 ///                    If the XNN_FLAG_TRANSPOSE_WEIGHTS flag is not specified, the filter tensor must have
564 ///                    [output_channels, input_channels] dimensions. If the XNN_FLAG_TRANSPOSE_WEIGHTS flag is
565 ///                    specified, the filter tensor must have [input_channels, output_channels] dimensions.
566 /// @param bias_id - Value ID for the bias tensor, or XNN_INVALID_VALUE_ID for a Fully Connected Node without a bias.
567 ///                  If present, the bias tensor must be a 1D tensor defined in the @a subgraph with [output_channels]
568 ///                  dimensions.
569 /// @param output_id - Value ID for the output tensor. The output tensor must be defined in the @a subgraph.
570 ///                    If XNN_FLAG_TENSORFLOW_RESHAPE_2D is not specified, the output tensor must have the same
571 ///                    dimensionality as the input tensor, all its dimensions but the last one must match the
572 ///                    corresponding dimensions of the input tensor, and the last dimensions of the output tensor must
573 ///                    match the first dimension of the filter tensor. In particular, if input is a 2D tensor, output
574 ///                    must be a 2D tensor of [batch_size, output_channels] dimensions.
575 ///                    If XNN_FLAG_TENSORFLOW_RESHAPE_2D is specified, output must be a 2D tensor of
576 ///                    [num_input_elements / input_channels, output_channels] dimensions where num_input_elements is the
577 ///                    total number of elements in the input tensor.
578 /// @param flags - binary features of the Fully Connected Node. The only currently supported values are
579 ///                XNN_FLAG_TENSORFLOW_RESHAPE_2D and XNN_FLAG_TRANSPOSE_WEIGHTS.
580 enum xnn_status xnn_define_fully_connected(
581   xnn_subgraph_t subgraph,
582   float output_min,
583   float output_max,
584   uint32_t input_id,
585   uint32_t filter_id,
586   uint32_t bias_id,
587   uint32_t output_id,
588   uint32_t flags);
589 
590 /// Define a 2D Max Pooling Node and add it to a Subgraph.
591 ///
592 /// @param subgraph - a Subgraph object that will own the created Node.
593 /// @param input_padding_top - implicit zero-padding above 2D input data. Must be 0 if XNN_FLAG_TENSORFLOW_SAME_PADDING
594 ///                            flag is specified.
595 /// @param input_padding_right - implicit zero-padding to the right of 2D input data. Must be 0 if
596 ///                              XNN_FLAG_TENSORFLOW_SAME_PADDING flag is specified.
597 /// @param input_padding_bottom - implicit zero-padding below 2D input data. Must be 0 if
598 ///                               XNN_FLAG_TENSORFLOW_SAME_PADDING flag is specified.
599 /// @param input_padding_left - implicit zero-padding to the left of 2D input data. Must be 0 if
600 ///                             XNN_FLAG_TENSORFLOW_SAME_PADDING flag is specified.
601 /// @param pooling_height - pooling (kernel) height.
602 /// @param pooling_width - pooling (kernel) width.
603 /// @param stride_height - displacing of the pooling window in the vertical dimension of the input pixels corresponding
604 ///                        to vertically adjacent output pixels.
605 /// @param stride_width - displacing of the pooling window in the horizontal dimension of the input pixels corresponding
606 ///                        to horizontally adjacent output pixels.
607 /// @param dilation_height - dilation of pooling elements along the height dimension.
608 /// @param dilation_width - dilation of pooling elements along the width dimension.
609 /// @param output_min - lower bound for clipping output values.
610 /// @param output_max - upper bound for clipping output values.
611 /// @param input_id - Value ID for the input tensor. The input tensor must be a 4D tensor defined in the @a subgraph
612 ///                   with [N, IH, IW, channels] dimensions
613 /// @param output_id - Value ID for the output tensor. The output tensor must be a 4D tensor defined in the @a subgraph
614 ///                    with [N, OH, OW, channels] dimensions.
615 /// @param flags - binary features of the 2D Max Pooling Node. The only currently supported values is
616 ///                XNN_FLAG_TENSORFLOW_SAME_PADDING.
617 enum xnn_status xnn_define_max_pooling_2d(
618   xnn_subgraph_t subgraph,
619   uint32_t input_padding_top,
620   uint32_t input_padding_right,
621   uint32_t input_padding_bottom,
622   uint32_t input_padding_left,
623   uint32_t pooling_height,
624   uint32_t pooling_width,
625   uint32_t stride_height,
626   uint32_t stride_width,
627   uint32_t dilation_height,
628   uint32_t dilation_width,
629   float output_min,
630   float output_max,
631   uint32_t input_id,
632   uint32_t output_id,
633   uint32_t flags);
634 
635 /// Define a 2D ArgMax Pooling Node and add it to a Subgraph.
636 ///
637 /// @param subgraph - a Subgraph object that will own the created Node.
638 /// @param input_padding_top - implicit zero-padding above 2D input data.
639 /// @param input_padding_right - implicit zero-padding to the right of 2D input data.
640 /// @param input_padding_bottom - implicit zero-padding below 2D input data.
641 /// @param input_padding_left - implicit zero-padding to the left of 2D input data.
642 /// @param pooling_height - pooling (kernel) height. Vertical stride between pooling regions match this value.
643 /// @param pooling_width - pooling (kernel) width. Horizontal stride between pooling regions match this value.
644 /// @param input_id - Value ID for the input tensor. The input tensor must be a 4D tensor defined in the @a subgraph
645 ///                   with [N, IH, IW, channels] dimensions
646 /// @param output_value_id - Value ID for the output tensor with the maximum values in the pools. The output tensor must
647 ///                          be a 4D tensor defined in the @a subgraph with [N, OH, OW, channels] dimensions.
648 /// @param output_index_id - Value ID for the output tensor with the indexes of the maximum values in the pools. The
649 ///                          output tensor must be a 4D tensor defined in the @a subgraph with [N, OH, OW, channels]
650 ///                          dimensions.
651 /// @param flags - binary features of the 2D ArgMax Pooling Node. No supported flags are currently defined.
652 enum xnn_status xnn_define_argmax_pooling_2d(
653   xnn_subgraph_t subgraph,
654   uint32_t input_padding_top,
655   uint32_t input_padding_right,
656   uint32_t input_padding_bottom,
657   uint32_t input_padding_left,
658   uint32_t pooling_height,
659   uint32_t pooling_width,
660   uint32_t input_id,
661   uint32_t output_value_id,
662   uint32_t output_index_id,
663   uint32_t flags);
664 
665 /// Define a 2D UnPooling Node and add it to a Subgraph.
666 ///
667 /// @param subgraph - a Subgraph object that will own the created Node.
668 /// @param padding_top - implicit padding above 2D output data.
669 /// @param padding_right - implicit padding to the right of 2D output data.
670 /// @param padding_bottom - implicit padding below 2D output data.
671 /// @param padding_left - implicit padding to the left of 2D output data.
672 /// @param pooling_height - height of the pooling window.
673 /// @param pooling_width - width of the pooling window.
674 /// @param input_value_id - Value ID for the input tensor with the max-pooling values to invert. The input value tensor
675 ///                         must be a 4D tensor defined in the @a subgraph with [N, IH, IW, channels] dimensions.
676 /// @param input_index_id - Value ID for the input tensor with the indices of the per-pool maximum values produced by
677 ///                         a 2D UnPooling Node. The input tensor must be a 4D tensor defined in the @a subgraph with
678 ///                         [N, IH, IW, channels] dimensions.
679 /// @param output_id - Value ID for the output tensor. The output tensor must be a 4D tensor defined in the @a subgraph
680 ///                    with [N, OH, OW, channels] dimensions.
681 /// @param flags - binary features of the 2D UnPooling Node. No supported flags are currently defined.
682 enum xnn_status xnn_define_unpooling_2d(
683   xnn_subgraph_t subgraph,
684   uint32_t padding_top,
685   uint32_t padding_right,
686   uint32_t padding_bottom,
687   uint32_t padding_left,
688   uint32_t pooling_height,
689   uint32_t pooling_width,
690   uint32_t input_value_id,
691   uint32_t input_index_id,
692   uint32_t output_id,
693   uint32_t flags);
694 
695 /// Define a 2-Input Add Node and add it to a Subgraph.
696 ///
697 /// The 2-Input Add Node computes elementwise addition of two tensor inputs with numpy broadcasting rules.
698 ///
699 /// @param subgraph - a Subgraph object that will own the created Node.
700 /// @param output_min - lower bound for clipping output values.
701 /// @param output_max - upper bound for clipping output values.
702 /// @param input1_id - Value ID for the first input tensor. The input tensor must be an N-dimensional tensor defined in
703 ///                    the @a subgraph with each dimension either equal to the corresponding dimension of the second
704 ///                    input, or equal to 1. In the latter case, the elements of the input tensor are broadcasted along
705 ///                    that dimension.
706 /// @param input2_id - Value ID for the second input tensor. The input tensor must be an M-dimensional tensor defined in
707 ///                    the @a subgraph with each dimension either equal to the corresponding dimension of the first
708 ///                    input, or equal to 1. In the latter case, the elements of the input tensor are broadcasted along
709 ///                    that dimension.
710 /// @param output_id - Value ID for the output tensor. The output tensor must be a max(N,M)-dimensional tensor defined
711 ///                    in the @a subgraph with each dimension equal to the maximum between the corresponding dimension
712 ///                    of the two inputs.
713 /// @param flags - binary features of the Add Node. No supported flags are currently defined.
714 enum xnn_status xnn_define_add2(
715   xnn_subgraph_t subgraph,
716   float output_min,
717   float output_max,
718   uint32_t input1_id,
719   uint32_t input2_id,
720   uint32_t output_id,
721   uint32_t flags);
722 
723 /// Define a 2-Input Multiply Node and add it to a Subgraph.
724 ///
725 /// The 2-Input Multiply Node computes elementwise multiplication of two tensor inputs with numpy broadcasting rules.
726 ///
727 /// @param subgraph - a Subgraph object that will own the created Node.
728 /// @param output_min - lower bound for clipping output values.
729 /// @param output_max - upper bound for clipping output values.
730 /// @param input1_id - Value ID for the first input tensor. The input tensor must be an N-dimensional tensor defined in
731 ///                    the @a subgraph with each dimension either equal to the corresponding dimension of the second
732 ///                    input, or equal to 1. In the latter case, the elements of the input tensor are broadcasted along
733 ///                    that dimension.
734 /// @param input2_id - Value ID for the second input tensor. The input tensor must be an M-dimensional tensor defined in
735 ///                    the @a subgraph with each dimension either equal to the corresponding dimension of the first
736 ///                    input, or equal to 1. In the latter case, the elements of the input tensor are broadcasted along
737 ///                    that dimension.
738 /// @param output_id - Value ID for the output tensor. The output tensor must be a max(N,M)-dimensional tensor defined
739 ///                    in the @a subgraph with each dimension equal to the maximum between the corresponding dimension
740 ///                    of the two inputs.
741 /// @param flags - binary features of the Multiply Node. No supported flags are currently defined.
742 enum xnn_status xnn_define_multiply2(
743   xnn_subgraph_t subgraph,
744   float output_min,
745   float output_max,
746   uint32_t input1_id,
747   uint32_t input2_id,
748   uint32_t output_id,
749   uint32_t flags);
750 
751 /// Define a Subtract Node and add it to a Subgraph.
752 ///
753 /// The Subtract Node computes elementwise subtraction of two tensor inputs with numpy broadcasting rules.
754 ///
755 /// @param subgraph - a Subgraph object that will own the created Node.
756 /// @param output_min - lower bound for clipping output values.
757 /// @param output_max - upper bound for clipping output values.
758 /// @param input1_id - Value ID for the first input tensor. The input tensor must be an N-dimensional tensor defined in
759 ///                    the @a subgraph with each dimension either equal to the corresponding dimension of the second
760 ///                    input, or equal to 1. In the latter case, the elements of the input tensor are broadcasted along
761 ///                    that dimension.
762 /// @param input2_id - Value ID for the second input tensor. The input tensor must be an M-dimensional tensor defined in
763 ///                    the @a subgraph with each dimension either equal to the corresponding dimension of the first
764 ///                    input, or equal to 1. In the latter case, the elements of the input tensor are broadcasted along
765 ///                    that dimension.
766 /// @param output_id - Value ID for the output tensor. The output tensor must be a max(N,M)-dimensional tensor defined
767 ///                    in the @a subgraph with each dimension equal to the maximum between the corresponding dimension
768 ///                    of the two inputs.
769 /// @param flags - binary features of the Subtract Node. No supported flags are currently defined.
770 enum xnn_status xnn_define_subtract(
771   xnn_subgraph_t subgraph,
772   float output_min,
773   float output_max,
774   uint32_t input1_id,
775   uint32_t input2_id,
776   uint32_t output_id,
777   uint32_t flags);
778 
779 /// Define a Divide Node and add it to a Subgraph.
780 ///
781 /// The Divide Node computes elementwise division of two tensor inputs with numpy broadcasting rules.
782 ///
783 /// @param subgraph - a Subgraph object that will own the created Node.
784 /// @param output_min - lower bound for clipping output values.
785 /// @param output_max - upper bound for clipping output values.
786 /// @param input1_id - Value ID for the first input tensor. The input tensor must be an N-dimensional tensor defined in
787 ///                    the @a subgraph with each dimension either equal to the corresponding dimension of the second
788 ///                    input, or equal to 1. In the latter case, the elements of the input tensor are broadcasted along
789 ///                    that dimension.
790 /// @param input2_id - Value ID for the second input tensor. The input tensor must be an M-dimensional tensor defined in
791 ///                    the @a subgraph with each dimension either equal to the corresponding dimension of the first
792 ///                    input, or equal to 1. In the latter case, the elements of the input tensor are broadcasted along
793 ///                    that dimension.
794 /// @param output_id - Value ID for the output tensor. The output tensor must be a max(N,M)-dimensional tensor defined
795 ///                    in the @a subgraph with each dimension equal to the maximum between the corresponding dimension
796 ///                    of the two inputs.
797 /// @param flags - binary features of the Divide Node. No supported flags are currently defined.
798 enum xnn_status xnn_define_divide(
799   xnn_subgraph_t subgraph,
800   float output_min,
801   float output_max,
802   uint32_t input1_id,
803   uint32_t input2_id,
804   uint32_t output_id,
805   uint32_t flags);
806 
807 /// Define a 2-Input Maximum Node and add it to a Subgraph.
808 ///
809 /// The 2-Input Maximum Node computes elementwise maximum of two tensor inputs with numpy broadcasting rules.
810 ///
811 /// @param subgraph - a Subgraph object that will own the created Node.
812 /// @param input1_id - Value ID for the first input tensor. The input tensor must be an N-dimensional tensor defined in
813 ///                    the @a subgraph with each dimension either equal to the corresponding dimension of the second
814 ///                    input, or equal to 1. In the latter case, the elements of the input tensor are broadcasted along
815 ///                    that dimension.
816 /// @param input2_id - Value ID for the second input tensor. The input tensor must be an M-dimensional tensor defined in
817 ///                    the @a subgraph with each dimension either equal to the corresponding dimension of the first
818 ///                    input, or equal to 1. In the latter case, the elements of the input tensor are broadcasted along
819 ///                    that dimension.
820 /// @param output_id - Value ID for the output tensor. The output tensor must be a max(N,M)-dimensional tensor defined
821 ///                    in the @a subgraph with each dimension equal to the maximum between the corresponding dimension
822 ///                    of the two inputs.
823 /// @param flags - binary features of the Maximum Node. No supported flags are currently defined.
824 enum xnn_status xnn_define_maximum2(
825   xnn_subgraph_t subgraph,
826   uint32_t input1_id,
827   uint32_t input2_id,
828   uint32_t output_id,
829   uint32_t flags);
830 
831 /// Define a 2-Input Minimum Node and add it to a Subgraph.
832 ///
833 /// The 2-Input Minimum Node computes elementwise minimum of two tensor inputs with numpy broadcasting rules.
834 ///
835 /// @param subgraph - a Subgraph object that will own the created Node.
836 /// @param input1_id - Value ID for the first input tensor. The input tensor must be an N-dimensional tensor defined in
837 ///                    the @a subgraph with each dimension either equal to the corresponding dimension of the second
838 ///                    input, or equal to 1. In the latter case, the elements of the input tensor are broadcasted along
839 ///                    that dimension.
840 /// @param input2_id - Value ID for the second input tensor. The input tensor must be an M-dimensional tensor defined in
841 ///                    the @a subgraph with each dimension either equal to the corresponding dimension of the first
842 ///                    input, or equal to 1. In the latter case, the elements of the input tensor are broadcasted along
843 ///                    that dimension.
844 /// @param output_id - Value ID for the output tensor. The output tensor must be a max(N,M)-dimensional tensor defined
845 ///                    in the @a subgraph with each dimension equal to the maximum between the corresponding dimension
846 ///                    of the two inputs.
847 /// @param flags - binary features of the Minimum Node. No supported flags are currently defined.
848 enum xnn_status xnn_define_minimum2(
849   xnn_subgraph_t subgraph,
850   uint32_t input1_id,
851   uint32_t input2_id,
852   uint32_t output_id,
853   uint32_t flags);
854 
855 /// Define a Squared Difference Node and add it to a Subgraph.
856 ///
857 /// The Squared Difference Node computes elementwise squared difference of two tensor inputs with numpy broadcasting
858 /// rules.
859 ///
860 /// @param subgraph - a Subgraph object that will own the created Node.
861 /// @param input1_id - Value ID for the first input tensor. The input tensor must be an N-dimensional tensor defined in
862 ///                    the @a subgraph with each dimension either equal to the corresponding dimension of the second
863 ///                    input, or equal to 1. In the latter case, the elements of the input tensor are broadcasted along
864 ///                    that dimension.
865 /// @param input2_id - Value ID for the second input tensor. The input tensor must be an M-dimensional tensor defined in
866 ///                    the @a subgraph with each dimension either equal to the corresponding dimension of the first
867 ///                    input, or equal to 1. In the latter case, the elements of the input tensor are broadcasted along
868 ///                    that dimension.
869 /// @param output_id - Value ID for the output tensor. The output tensor must be a max(N,M)-dimensional tensor defined
870 ///                    in the @a subgraph with each dimension equal to the maximum between the corresponding dimension
871 ///                    of the two inputs.
872 /// @param flags - binary features of the Squared Difference Node. No supported flags are currently defined.
873 enum xnn_status xnn_define_squared_difference(
874   xnn_subgraph_t subgraph,
875   uint32_t input1_id,
876   uint32_t input2_id,
877   uint32_t output_id,
878   uint32_t flags);
879 
880 /// Define a Constant Pad Node with static padding specification and add it to a Subgraph.
881 ///
882 /// @param subgraph - a Subgraph object that will own the created Node.
883 /// @param pre_paddings - number of padding elements to insert before input elements for every dimension. This array
884 ///                       must have as many elements as the the number of dimensions in the input tensor.
885 /// @param post_paddings - number of padding elements to insert after input elements for every dimension. This array
886 ///                        must have as many elements as the the number of dimensions in the input tensor.
887 /// @param padding_value - constant value used to initialize padding elements.
888 /// @param input_id - Value ID for the input tensor. The input tensor must be defined in the @a subgraph.
889 /// @param output_id - Value ID for the output tensor. The output tensor must be defined in the @a subgraph, and its
890 ///                    shape must match the shape of the input tensor with padding.
891 /// @param flags - binary features of the Constant Pad Node. No supported flags are currently defined.
892 enum xnn_status xnn_define_static_constant_pad(
893   xnn_subgraph_t subgraph,
894   const size_t* pre_paddings,
895   const size_t* post_paddings,
896   float padding_value,
897   uint32_t input_id,
898   uint32_t output_id,
899   uint32_t flags);
900 
901 /// Define a Reshape Node with static shape specification and add it to a Subgraph.
902 ///
903 /// @param subgraph - a Subgraph object that will own the created Node.
904 /// @param num_dims - number of shape dimensions in the output tensor.
905 /// @param new_shape - shape dimensions of the output tensor.
906 /// @param input_id - Value ID for the input tensor. The input tensor must be defined in the @a subgraph.
907 /// @param output_id - Value ID for the output tensor. The output tensor must be defined in the @a subgraph, and its
908 ///                    shape must match the shape of the input tensor with padding.
909 /// @param flags - binary features of the Reshape Node. No supported flags are currently defined.
910 enum xnn_status xnn_define_static_reshape(
911   xnn_subgraph_t subgraph,
912   size_t num_dims,
913   const size_t* new_shape,
914   uint32_t input_id,
915   uint32_t output_id,
916   uint32_t flags);
917 
918 /// Define a 2D Resize Bilinear Node with static output height & width specification and add it to a Subgraph.
919 ///
920 /// @param subgraph - a Subgraph object that will own the created Node.
921 /// @param new_height - height dimension of the output tensor.
922 /// @param new_width - width dimension of the output tensor.
923 /// @param input_id - Value ID for the input tensor. The input tensor must be a 4D tensor defined in the @a subgraph
924 ///                   with [N, H, W, C] dimensions.
925 /// @param output_id - Value ID for the output tensor. The output tensor must be a 4D tensor defined in the @a subgraph
926 ///                    with [N, new_height, new_width, C] dimensions.
927 /// @param flags - binary features of the 2D Resize Bilinear Node. The only currently supported values are
928 ///                XNN_FLAG_TENSORFLOW_LEGACY_MODE and XNN_FLAG_ALIGN_CORNERS, which are mutually exclusive.
929 enum xnn_status xnn_define_static_resize_bilinear_2d(
930   xnn_subgraph_t subgraph,
931   size_t new_height,
932   size_t new_width,
933   uint32_t input_id,
934   uint32_t output_id,
935   uint32_t flags);
936 
937 /// Define a PReLU (Parametric ReLU) Node and add it to a Subgraph.
938 ///
939 /// @param subgraph - a Subgraph object that will own the created Node.
940 /// @param input_id - Value ID for the input tensor. The input tensor must be a 4D tensor defined in the @a subgraph
941 ///                   with [N, H, W, channels] dimensions.
942 /// @param slope_id - Value ID for the bias tensor. The bias tensor must be a 1D tensor defined in the @a subgraph with
943 ///                   [channels] dimensions.
944 /// @param output_id - Value ID for the output tensor. The output tensor must be a 4D tensor defined in the @a subgraph
945 ///                    with [N, H, W, channels] dimensions.
946 /// @param flags - binary features of the PReLU Node. No supported flags are currently defined.
947 enum xnn_status xnn_define_prelu(
948   xnn_subgraph_t subgraph,
949   uint32_t input_id,
950   uint32_t slope_id,
951   uint32_t output_id,
952   uint32_t flags);
953 
954 /// Define a Abs Node and add it to a Subgraph.
955 ///
956 /// @param subgraph - a Subgraph object that will own the created Node.
957 /// @param input_id - Value ID for the input tensor. The input tensor must be defined in the @a subgraph.
958 /// @param output_id - Value ID for the output tensor. The output tensor must be defined in the @a subgraph, and its
959 ///                    shape must match the shape of the input tensor.
960 /// @param flags - binary features of the Abs Node. No supported flags are currently defined.
961 enum xnn_status xnn_define_abs(
962   xnn_subgraph_t subgraph,
963   uint32_t input_id,
964   uint32_t output_id,
965   uint32_t flags);
966 
967 /// Define a Bankers' Rounding Node and add it to a Subgraph.
968 ///
969 /// @param subgraph - a Subgraph object that will own the created Node.
970 /// @param input_id - Value ID for the input tensor. The input tensor must be defined in the @a subgraph.
971 /// @param output_id - Value ID for the output tensor. The output tensor must be defined in the @a subgraph, and its
972 ///                    shape must match the shape of the input tensor.
973 /// @param flags - binary features of the Bankers' Rounding Node. No supported flags are currently defined.
974 enum xnn_status xnn_define_bankers_rounding(
975   xnn_subgraph_t subgraph,
976   uint32_t input_id,
977   uint32_t output_id,
978   uint32_t flags);
979 
980 /// Define a Ceiling Node and add it to a Subgraph.
981 ///
982 /// @param subgraph - a Subgraph object that will own the created Node.
983 /// @param input_id - Value ID for the input tensor. The input tensor must be defined in the @a subgraph.
984 /// @param output_id - Value ID for the output tensor. The output tensor must be defined in the @a subgraph, and its
985 ///                    shape must match the shape of the input tensor.
986 /// @param flags - binary features of the Ceiling Node. No supported flags are currently defined.
987 enum xnn_status xnn_define_ceiling(
988   xnn_subgraph_t subgraph,
989   uint32_t input_id,
990   uint32_t output_id,
991   uint32_t flags);
992 
993 /// Define a Clamp Node and add it to a Subgraph.
994 ///
995 /// @param subgraph - a Subgraph object that will own the created Node.
996 /// @param output_min - lower bound for clipping output values.
997 /// @param output_max - upper bound for clipping output values.
998 /// @param input_id - Value ID for the input tensor. The input tensor must be defined in the @a subgraph.
999 /// @param output_id - Value ID for the output tensor. The output tensor must be defined in the @a subgraph, and its
1000 ///                    shape must match the shape of the input tensor.
1001 /// @param flags - binary features of the Clamp Node. No supported flags are currently defined.
1002 enum xnn_status xnn_define_clamp(
1003   xnn_subgraph_t subgraph,
1004   float output_min,
1005   float output_max,
1006   uint32_t input_id,
1007   uint32_t output_id,
1008   uint32_t flags);
1009 
1010 /// Define an ELU (Exponential Linear Unit) Node and add it to a Subgraph.
1011 ///
1012 /// @param subgraph - a Subgraph object that will own the created Node.
1013 /// @param alpha - scale factor for negative output elements.
1014 /// @param input_id - Value ID for the input tensor. The input tensor must be defined in the @a subgraph.
1015 /// @param output_id - Value ID for the output tensor. The output tensor must be defined in the @a subgraph, and its
1016 ///                    shape must match the shape of the input tensor.
1017 /// @param flags - binary features of the ELU Node. No supported flags are currently defined.
1018 enum xnn_status xnn_define_elu(
1019   xnn_subgraph_t subgraph,
1020   float alpha,
1021   uint32_t input_id,
1022   uint32_t output_id,
1023   uint32_t flags);
1024 
1025 /// Define a Floor Node and add it to a Subgraph.
1026 ///
1027 /// @param subgraph - a Subgraph object that will own the created Node.
1028 /// @param input_id - Value ID for the input tensor. The input tensor must be defined in the @a subgraph.
1029 /// @param output_id - Value ID for the output tensor. The output tensor must be defined in the @a subgraph, and its
1030 ///                    shape must match the shape of the input tensor.
1031 /// @param flags - binary features of the Floor Node. No supported flags are currently defined.
1032 enum xnn_status xnn_define_floor(
1033   xnn_subgraph_t subgraph,
1034   uint32_t input_id,
1035   uint32_t output_id,
1036   uint32_t flags);
1037 
1038 /// Define a HardSwish Node and add it to a Subgraph.
1039 ///
1040 /// @param subgraph - a Subgraph object that will own the created Node.
1041 /// @param input_id - Value ID for the input tensor. The input tensor must be defined in the @a subgraph.
1042 /// @param output_id - Value ID for the output tensor. The output tensor must be defined in the @a subgraph, and its
1043 ///                    shape must match the shape of the input tensor.
1044 /// @param flags - binary features of the HardSwish Node. No supported flags are currently defined.
1045 enum xnn_status xnn_define_hardswish(
1046   xnn_subgraph_t subgraph,
1047   uint32_t input_id,
1048   uint32_t output_id,
1049   uint32_t flags);
1050 
1051 /// Define a Leaky ReLU Node and add it to a Subgraph.
1052 ///
1053 /// @param subgraph - a Subgraph object that will own the created Node.
1054 /// @param negative_slope - scale factor for negative input elements.
1055 /// @param input_id - Value ID for the input tensor. The input tensor must be defined in the @a subgraph.
1056 /// @param output_id - Value ID for the output tensor. The output tensor must be defined in the @a subgraph, and its
1057 ///                    shape must match the shape of the input tensor.
1058 /// @param flags - binary features of the Leaky ReLU Node. No supported flags are currently defined.
1059 enum xnn_status xnn_define_leaky_relu(
1060   xnn_subgraph_t subgraph,
1061   float negative_slope,
1062   uint32_t input_id,
1063   uint32_t output_id,
1064   uint32_t flags);
1065 
1066 /// Define a Negate Node and add it to a Subgraph.
1067 ///
1068 /// @param subgraph - a Subgraph object that will own the created Node.
1069 /// @param input_id - Value ID for the input tensor. The input tensor must be defined in the @a subgraph.
1070 /// @param output_id - Value ID for the output tensor. The output tensor must be defined in the @a subgraph, and its
1071 ///                    shape must match the shape of the input tensor.
1072 /// @param flags - binary features of the Negate Node. No supported flags are currently defined.
1073 enum xnn_status xnn_define_negate(
1074   xnn_subgraph_t subgraph,
1075   uint32_t input_id,
1076   uint32_t output_id,
1077   uint32_t flags);
1078 
1079 /// Define a Sigmoid Node and add it to a Subgraph.
1080 ///
1081 /// @param subgraph - a Subgraph object that will own the created Node.
1082 /// @param input_id - Value ID for the input tensor. The input tensor must be defined in the @a subgraph.
1083 /// @param output_id - Value ID for the output tensor. The output tensor must be defined in the @a subgraph, and its
1084 ///                    shape must match the shape of the input tensor.
1085 /// @param flags - binary features of the Sigmoid Node. No supported flags are currently defined.
1086 enum xnn_status xnn_define_sigmoid(
1087   xnn_subgraph_t subgraph,
1088   uint32_t input_id,
1089   uint32_t output_id,
1090   uint32_t flags);
1091 
1092 /// Define a SoftMax Node and add it to a Subgraph.
1093 ///
1094 /// @param subgraph - a Subgraph object that will own the created Node.
1095 /// @param input_id - Value ID for the input tensor. The input tensor must be defined in the @a subgraph, and have at
1096 ///                   least one dimension.
1097 /// @param output_id - Value ID for the output tensor. The output tensor must be defined in the @a subgraph, and its
1098 ///                    shape must match the shape of the input tensor.
1099 /// @param flags - binary features of the SoftMax Node. No supported flags are currently defined.
1100 enum xnn_status xnn_define_softmax(
1101   xnn_subgraph_t subgraph,
1102   uint32_t input_id,
1103   uint32_t output_id,
1104   uint32_t flags);
1105 
1106 /// Define a Square Node and add it to a Subgraph.
1107 ///
1108 /// @param subgraph - a Subgraph object that will own the created Node.
1109 /// @param input_id - Value ID for the input tensor. The input tensor must be defined in the @a subgraph.
1110 /// @param output_id - Value ID for the output tensor. The output tensor must be defined in the @a subgraph, and its
1111 ///                    shape must match the shape of the input tensor.
1112 /// @param flags - binary features of the Square Node. No supported flags are currently defined.
1113 enum xnn_status xnn_define_square(
1114   xnn_subgraph_t subgraph,
1115   uint32_t input_id,
1116   uint32_t output_id,
1117   uint32_t flags);
1118 
1119 /// Define a Square Root Node and add it to a Subgraph.
1120 ///
1121 /// @param subgraph - a Subgraph object that will own the created Node.
1122 /// @param input_id - Value ID for the input tensor. The input tensor must be defined in the @a subgraph.
1123 /// @param output_id - Value ID for the output tensor. The output tensor must be defined in the @a subgraph, and its
1124 ///                    shape must match the shape of the input tensor.
1125 /// @param flags - binary features of the Square Root Node. No supported flags are currently defined.
1126 enum xnn_status xnn_define_square_root(
1127   xnn_subgraph_t subgraph,
1128   uint32_t input_id,
1129   uint32_t output_id,
1130   uint32_t flags);
1131 
1132 /// Runtime is a combination of an execution plan for subgraph Nodes and a memory manager for subgraph Values.
1133 typedef struct xnn_runtime* xnn_runtime_t;
1134 
1135 /// Create a Runtime object from a subgraph.
1136 ///
1137 /// @param subgraph - a Subgraph object with all Values and Nodes that would be handled by the runtime. No Values or
1138 ///                   Nodes can be added to the runtime once it is constructed.
1139 /// @param threadpool - the thread pool to be used for parallelisation of computations in the runtime. If the thread
1140 ///                     pool is NULL, the computation would run on the caller thread without parallelization.
1141 /// @param flags - binary features of the runtime. The only currently supported values are XNN_FLAG_SPARSE_INFERENCE,
1142 ///                XNN_FLAG_FP16_INFERENCE, and XNN_FLAG_YIELD_WORKERS. If XNN_FLAG_YIELD_WORKERS is specified, worker
1143 ///                threads would be yielded to the system scheduler after processing the last operator in the Runtime.
1144 /// @param runtime_out - pointer to the variable that will be initialized with a handle to the Runtime object upon
1145 ///                      successful return. Once constructed, the Runtime object is independent of the Subgraph object
1146 ///                      used to create it.
1147 enum xnn_status xnn_create_runtime_v2(
1148   xnn_subgraph_t subgraph,
1149   pthreadpool_t threadpool,
1150   uint32_t flags,
1151   xnn_runtime_t* runtime_out);
1152 
1153 enum xnn_status xnn_create_runtime(
1154   xnn_subgraph_t subgraph,
1155   xnn_runtime_t* runtime_out);
1156 
1157 struct xnn_external_value {
1158   uint32_t id;
1159   void* data;
1160 };
1161 
1162 /// Setup data pointers for external inputs and outputs in a Runtime object.
1163 ///
1164 /// @param runtime - a Runtime object created with @ref xnn_create_runtime or @ref xnn_create_runtime_v2.
1165 /// @param num_external_values - the number of external inputs and outputs specified in this call. This number must
1166 ///                              match the number of external inputs and outputs in the runtime, i.e. all external
1167 ///                              inputs and outputs in the runtime must be specified in one call.
1168 /// @param external_values - array with location information for all external inputs and outputs in the runtime.
1169 enum xnn_status xnn_setup_runtime(
1170   xnn_runtime_t runtime,
1171   size_t num_external_values,
1172   const struct xnn_external_value* external_values);
1173 
1174 /// Execute forward pass for all operators in the runtime.
1175 ///
1176 /// @param runtime - the Runtime object with the execution plan to invoke.
1177 enum xnn_status xnn_invoke_runtime(
1178   xnn_runtime_t runtime);
1179 
1180 /// Destroy a Runtime object, as well as operators and memory associated with it.
1181 ///
1182 /// @param runtime - the Runtime object to destroy.
1183 enum xnn_status xnn_delete_runtime(
1184   xnn_runtime_t runtime);
1185 
1186 typedef struct xnn_operator* xnn_operator_t;
1187 
1188 enum xnn_status xnn_run_operator(
1189   xnn_operator_t op,
1190   pthreadpool_t threadpool);
1191 
1192 enum xnn_status xnn_delete_operator(
1193   xnn_operator_t op);
1194 
1195 #ifndef XNN_NO_F32_OPERATORS
1196 
1197 enum xnn_status xnn_create_abs_nc_f32(
1198   size_t channels,
1199   size_t input_stride,
1200   size_t output_stride,
1201   uint32_t flags,
1202   xnn_operator_t* abs_op_out);
1203 
1204 enum xnn_status xnn_setup_abs_nc_f32(
1205   xnn_operator_t abs_op,
1206   size_t batch_size,
1207   const float* input,
1208   float* output,
1209   pthreadpool_t threadpool);
1210 
1211 enum xnn_status xnn_create_add_nd_f32(
1212   float output_min,
1213   float output_max,
1214   uint32_t flags,
1215   xnn_operator_t* add_op_out);
1216 
1217 enum xnn_status xnn_setup_add_nd_f32(
1218   xnn_operator_t add_op,
1219   size_t num_input1_dims,
1220   const size_t* input1_shape,
1221   size_t num_input2_dims,
1222   const size_t* input2_shape,
1223   const float* input1,
1224   const float* input2,
1225   float* output,
1226   pthreadpool_t threadpool);
1227 
1228 enum xnn_status xnn_create_argmax_pooling2d_nhwc_f32(
1229   uint32_t input_padding_top,
1230   uint32_t input_padding_right,
1231   uint32_t input_padding_bottom,
1232   uint32_t input_padding_left,
1233   uint32_t pooling_height,
1234   uint32_t pooling_width,
1235   size_t channels,
1236   size_t input_pixel_stride,
1237   size_t output_pixel_stride,
1238   uint32_t flags,
1239   xnn_operator_t* argmax_pooling_op_out);
1240 
1241 enum xnn_status xnn_setup_argmax_pooling2d_nhwc_f32(
1242   xnn_operator_t argmax_pooling_op,
1243   size_t batch_size,
1244   size_t input_height,
1245   size_t input_width,
1246   const float* input,
1247   float* output,
1248   uint32_t* index,
1249   pthreadpool_t threadpool);
1250 
1251 enum xnn_status xnn_create_average_pooling2d_nhwc_f32(
1252   uint32_t input_padding_top,
1253   uint32_t input_padding_right,
1254   uint32_t input_padding_bottom,
1255   uint32_t input_padding_left,
1256   uint32_t pooling_height,
1257   uint32_t pooling_width,
1258   uint32_t stride_height,
1259   uint32_t stride_width,
1260   size_t channels,
1261   size_t input_pixel_stride,
1262   size_t output_pixel_stride,
1263   float output_min,
1264   float output_max,
1265   uint32_t flags,
1266   xnn_operator_t* average_pooling_op_out);
1267 
1268 enum xnn_status xnn_setup_average_pooling2d_nhwc_f32(
1269   xnn_operator_t average_pooling_op,
1270   size_t batch_size,
1271   size_t input_height,
1272   size_t input_width,
1273   const float* input,
1274   float* output,
1275   pthreadpool_t threadpool);
1276 
1277 enum xnn_status xnn_create_bankers_rounding_nc_f32(
1278   size_t channels,
1279   size_t input_stride,
1280   size_t output_stride,
1281   uint32_t flags,
1282   xnn_operator_t* rounding_op_out);
1283 
1284 enum xnn_status xnn_setup_bankers_rounding_nc_f32(
1285   xnn_operator_t rounding_op,
1286   size_t batch_size,
1287   const float* input,
1288   float* output,
1289   pthreadpool_t threadpool);
1290 
1291 enum xnn_status xnn_create_ceiling_nc_f32(
1292   size_t channels,
1293   size_t input_stride,
1294   size_t output_stride,
1295   uint32_t flags,
1296   xnn_operator_t* ceiling_op_out);
1297 
1298 enum xnn_status xnn_setup_ceiling_nc_f32(
1299   xnn_operator_t ceiling_op,
1300   size_t batch_size,
1301   const float* input,
1302   float* output,
1303   pthreadpool_t threadpool);
1304 
1305 enum xnn_status xnn_create_clamp_nc_f32(
1306   size_t channels,
1307   size_t input_stride,
1308   size_t output_stride,
1309   float output_min,
1310   float output_max,
1311   uint32_t flags,
1312   xnn_operator_t* clamp_op_out);
1313 
1314 enum xnn_status xnn_setup_clamp_nc_f32(
1315   xnn_operator_t clamp_op,
1316   size_t batch_size,
1317   const float* input,
1318   float* output,
1319   pthreadpool_t threadpool);
1320 
1321 enum xnn_status xnn_create_convolution2d_nhwc_f32(
1322   uint32_t input_padding_top,
1323   uint32_t input_padding_right,
1324   uint32_t input_padding_bottom,
1325   uint32_t input_padding_left,
1326   uint32_t kernel_height,
1327   uint32_t kernel_width,
1328   uint32_t subsampling_height,
1329   uint32_t subsampling_width,
1330   uint32_t dilation_height,
1331   uint32_t dilation_width,
1332   uint32_t groups,
1333   size_t group_input_channels,
1334   size_t group_output_channels,
1335   size_t input_channel_stride,
1336   size_t output_channel_stride,
1337   const float* kernel,
1338   const float* bias,
1339   float output_min,
1340   float output_max,
1341   uint32_t flags,
1342   xnn_operator_t* convolution_op_out);
1343 
1344 enum xnn_status xnn_setup_convolution2d_nhwc_f32(
1345   xnn_operator_t convolution_op,
1346   size_t batch_size,
1347   size_t input_height,
1348   size_t input_width,
1349   const float* input,
1350   float* output,
1351   pthreadpool_t threadpool);
1352 
1353 enum xnn_status xnn_create_deconvolution2d_nhwc_f32(
1354   uint32_t output_padding_top,
1355   uint32_t output_padding_right,
1356   uint32_t output_padding_bottom,
1357   uint32_t output_padding_left,
1358   uint32_t kernel_height,
1359   uint32_t kernel_width,
1360   uint32_t stride_height,
1361   uint32_t stride_width,
1362   uint32_t dilation_height,
1363   uint32_t dilation_width,
1364   uint32_t groups,
1365   size_t group_input_channels,
1366   size_t group_output_channels,
1367   size_t input_pixel_stride,
1368   size_t output_pixel_stride,
1369   const float* kernel,
1370   const float* bias,
1371   float output_min,
1372   float output_max,
1373   uint32_t flags,
1374   xnn_operator_t* deconvolution_op_out);
1375 
1376 enum xnn_status xnn_setup_deconvolution2d_nhwc_f32(
1377   xnn_operator_t deconvolution_op,
1378   size_t batch_size,
1379   size_t input_height,
1380   size_t input_width,
1381   uint32_t adjustment_height,
1382   uint32_t adjustment_width,
1383   const float* input,
1384   float* output,
1385   pthreadpool_t threadpool);
1386 
1387 enum xnn_status xnn_create_divide_nd_f32(
1388   float output_min,
1389   float output_max,
1390   uint32_t flags,
1391   xnn_operator_t* divide_op_out);
1392 
1393 enum xnn_status xnn_setup_divide_nd_f32(
1394   xnn_operator_t divide_op,
1395   size_t num_input1_dims,
1396   const size_t* input1_shape,
1397   size_t num_input2_dims,
1398   const size_t* input2_shape,
1399   const float* input1,
1400   const float* input2,
1401   float* output,
1402   pthreadpool_t threadpool);
1403 
1404 enum xnn_status xnn_create_elu_nc_f32(
1405   size_t channels,
1406   size_t input_stride,
1407   size_t output_stride,
1408   float alpha,
1409   uint32_t flags,
1410   xnn_operator_t* elu_op_out);
1411 
1412 enum xnn_status xnn_setup_elu_nc_f32(
1413   xnn_operator_t elu_op,
1414   size_t batch_size,
1415   const float* input,
1416   float* output,
1417   pthreadpool_t threadpool);
1418 
1419 enum xnn_status xnn_create_floor_nc_f32(
1420   size_t channels,
1421   size_t input_stride,
1422   size_t output_stride,
1423   uint32_t flags,
1424   xnn_operator_t* floor_op_out);
1425 
1426 enum xnn_status xnn_setup_floor_nc_f32(
1427   xnn_operator_t floor_op,
1428   size_t batch_size,
1429   const float* input,
1430   float* output,
1431   pthreadpool_t threadpool);
1432 
1433 enum xnn_status xnn_create_fully_connected_nc_f32(
1434   size_t input_channels,
1435   size_t output_channels,
1436   size_t input_stride,
1437   size_t output_stride,
1438   const float* kernel,
1439   const float* bias,
1440   float output_min,
1441   float output_max,
1442   uint32_t flags,
1443   xnn_operator_t* fully_connected_op_out);
1444 
1445 enum xnn_status xnn_setup_fully_connected_nc_f32(
1446   xnn_operator_t fully_connected_op,
1447   size_t batch_size,
1448   const float* input,
1449   float* output,
1450   pthreadpool_t threadpool);
1451 
1452 enum xnn_status xnn_create_global_average_pooling_nwc_f32(
1453   size_t channels,
1454   size_t input_stride,
1455   size_t output_stride,
1456   float output_min,
1457   float output_max,
1458   uint32_t flags,
1459   xnn_operator_t* global_average_pooling_op_out);
1460 
1461 enum xnn_status xnn_setup_global_average_pooling_nwc_f32(
1462   xnn_operator_t global_average_pooling_op,
1463   size_t batch_size,
1464   size_t width,
1465   const float* input,
1466   float* output,
1467   pthreadpool_t threadpool);
1468 
1469 enum xnn_status xnn_create_hardswish_nc_f32(
1470   size_t channels,
1471   size_t input_stride,
1472   size_t output_stride,
1473   uint32_t flags,
1474   xnn_operator_t* hardswish_op_out);
1475 
1476 enum xnn_status xnn_setup_hardswish_nc_f32(
1477   xnn_operator_t hardswish_op,
1478   size_t batch_size,
1479   const float* input,
1480   float* output,
1481   pthreadpool_t threadpool);
1482 
1483 enum xnn_status xnn_create_leaky_relu_nc_f32(
1484   size_t channels,
1485   size_t input_stride,
1486   size_t output_stride,
1487   float negative_slope,
1488   uint32_t flags,
1489   xnn_operator_t* leaky_relu_op_out);
1490 
1491 enum xnn_status xnn_setup_leaky_relu_nc_f32(
1492   xnn_operator_t leaky_relu_op,
1493   size_t batch_size,
1494   const float* input,
1495   float* output,
1496   pthreadpool_t threadpool);
1497 
1498 enum xnn_status xnn_create_max_pooling2d_nhwc_f32(
1499   uint32_t input_padding_top,
1500   uint32_t input_padding_right,
1501   uint32_t input_padding_bottom,
1502   uint32_t input_padding_left,
1503   uint32_t pooling_height,
1504   uint32_t pooling_width,
1505   uint32_t stride_height,
1506   uint32_t stride_width,
1507   uint32_t dilation_height,
1508   uint32_t dilation_width,
1509   size_t channels,
1510   size_t input_pixel_stride,
1511   size_t output_pixel_stride,
1512   float output_min,
1513   float output_max,
1514   uint32_t flags,
1515   xnn_operator_t* max_pooling_op_out);
1516 
1517 enum xnn_status xnn_setup_max_pooling2d_nhwc_f32(
1518   xnn_operator_t max_pooling_op,
1519   size_t batch_size,
1520   size_t input_height,
1521   size_t input_width,
1522   const float* input,
1523   float* output,
1524   pthreadpool_t threadpool);
1525 
1526 enum xnn_status xnn_create_maximum_nd_f32(
1527   uint32_t flags,
1528   xnn_operator_t* maximum_op_out);
1529 
1530 enum xnn_status xnn_setup_maximum_nd_f32(
1531   xnn_operator_t maximum_op,
1532   size_t num_input1_dims,
1533   const size_t* input1_shape,
1534   size_t num_input2_dims,
1535   const size_t* input2_shape,
1536   const float* input1,
1537   const float* input2,
1538   float* output,
1539   pthreadpool_t threadpool);
1540 
1541 enum xnn_status xnn_create_minimum_nd_f32(
1542   uint32_t flags,
1543   xnn_operator_t* minimum_op_out);
1544 
1545 enum xnn_status xnn_setup_minimum_nd_f32(
1546   xnn_operator_t minimum_op,
1547   size_t num_input1_dims,
1548   const size_t* input1_shape,
1549   size_t num_input2_dims,
1550   const size_t* input2_shape,
1551   const float* input1,
1552   const float* input2,
1553   float* output,
1554   pthreadpool_t threadpool);
1555 
1556 enum xnn_status xnn_create_multiply_nd_f32(
1557   float output_min,
1558   float output_max,
1559   uint32_t flags,
1560   xnn_operator_t* multiply_op_out);
1561 
1562 enum xnn_status xnn_setup_multiply_nd_f32(
1563   xnn_operator_t multiply_op,
1564   size_t num_input1_dims,
1565   const size_t* input1_shape,
1566   size_t num_input2_dims,
1567   const size_t* input2_shape,
1568   const float* input1,
1569   const float* input2,
1570   float* output,
1571   pthreadpool_t threadpool);
1572 
1573 enum xnn_status xnn_create_negate_nc_f32(
1574   size_t channels,
1575   size_t input_stride,
1576   size_t output_stride,
1577   uint32_t flags,
1578   xnn_operator_t* negate_op_out);
1579 
1580 enum xnn_status xnn_setup_negate_nc_f32(
1581   xnn_operator_t negate_op,
1582   size_t batch_size,
1583   const float* input,
1584   float* output,
1585   pthreadpool_t threadpool);
1586 
1587 enum xnn_status xnn_create_prelu_nc_f32(
1588   size_t channels,
1589   size_t input_stride,
1590   size_t output_stride,
1591   const float* negative_slope,
1592   uint32_t flags,
1593   xnn_operator_t* prelu_op_out);
1594 
1595 enum xnn_status xnn_setup_prelu_nc_f32(
1596   xnn_operator_t prelu_op,
1597   size_t batch_size,
1598   const float* input,
1599   float* output,
1600   pthreadpool_t threadpool);
1601 
1602 enum xnn_status xnn_create_resize_bilinear2d_nchw_f32(
1603   size_t channels,
1604   size_t input_pixel_stride,
1605   size_t output_pixel_stride,
1606   uint32_t flags,
1607   xnn_operator_t* resize_op_out);
1608 
1609 enum xnn_status xnn_setup_resize_bilinear2d_nchw_f32(
1610   xnn_operator_t resize_op,
1611   size_t batch_size,
1612   size_t input_height,
1613   size_t input_width,
1614   size_t output_height,
1615   size_t output_width,
1616   const float* input,
1617   float* output,
1618   pthreadpool_t threadpool);
1619 
1620 enum xnn_status xnn_create_resize_bilinear2d_nhwc_f32(
1621   size_t channels,
1622   size_t input_pixel_stride,
1623   size_t output_pixel_stride,
1624   uint32_t flags,
1625   xnn_operator_t* resize_op_out);
1626 
1627 enum xnn_status xnn_setup_resize_bilinear2d_nhwc_f32(
1628   xnn_operator_t resize_op,
1629   size_t batch_size,
1630   size_t input_height,
1631   size_t input_width,
1632   size_t output_height,
1633   size_t output_width,
1634   const float* input,
1635   float* output,
1636   pthreadpool_t threadpool);
1637 
1638 enum xnn_status xnn_create_sigmoid_nc_f32(
1639   size_t channels,
1640   size_t input_stride,
1641   size_t output_stride,
1642   uint32_t flags,
1643   xnn_operator_t* sigmoid_op_out);
1644 
1645 enum xnn_status xnn_setup_sigmoid_nc_f32(
1646   xnn_operator_t sigmoid_op,
1647   size_t batch_size,
1648   const float* input,
1649   float* output,
1650   pthreadpool_t threadpool);
1651 
1652 enum xnn_status xnn_create_softmax_nc_f32(
1653   size_t channels,
1654   size_t input_stride,
1655   size_t output_stride,
1656   uint32_t flags,
1657   xnn_operator_t* softmax_op_out);
1658 
1659 enum xnn_status xnn_setup_softmax_nc_f32(
1660   xnn_operator_t softmax_op,
1661   size_t batch_size,
1662   const float* input,
1663   float* output,
1664   pthreadpool_t threadpool);
1665 
1666 enum xnn_status xnn_create_square_nc_f32(
1667   size_t channels,
1668   size_t input_stride,
1669   size_t output_stride,
1670   uint32_t flags,
1671   xnn_operator_t* square_op_out);
1672 
1673 enum xnn_status xnn_setup_square_nc_f32(
1674   xnn_operator_t square_op,
1675   size_t batch_size,
1676   const float* input,
1677   float* output,
1678   pthreadpool_t threadpool);
1679 
1680 enum xnn_status xnn_create_square_root_nc_f32(
1681   size_t channels,
1682   size_t input_stride,
1683   size_t output_stride,
1684   uint32_t flags,
1685   xnn_operator_t* sqrt_op_out);
1686 
1687 enum xnn_status xnn_setup_square_root_nc_f32(
1688   xnn_operator_t sqrt_op,
1689   size_t batch_size,
1690   const float* input,
1691   float* output,
1692   pthreadpool_t threadpool);
1693 
1694 enum xnn_status xnn_create_squared_difference_nd_f32(
1695   uint32_t flags,
1696   xnn_operator_t* squared_difference_op_out);
1697 
1698 enum xnn_status xnn_setup_squared_difference_nd_f32(
1699   xnn_operator_t squared_difference_op,
1700   size_t num_input1_dims,
1701   const size_t* input1_shape,
1702   size_t num_input2_dims,
1703   const size_t* input2_shape,
1704   const float* input1,
1705   const float* input2,
1706   float* output,
1707   pthreadpool_t threadpool);
1708 
1709 enum xnn_status xnn_create_subtract_nd_f32(
1710   float output_min,
1711   float output_max,
1712   uint32_t flags,
1713   xnn_operator_t* subtract_op_out);
1714 
1715 enum xnn_status xnn_setup_subtract_nd_f32(
1716   xnn_operator_t subtract_op,
1717   size_t num_input1_dims,
1718   const size_t* input1_shape,
1719   size_t num_input2_dims,
1720   const size_t* input2_shape,
1721   const float* input1,
1722   const float* input2,
1723   float* output,
1724   pthreadpool_t threadpool);
1725 
1726 enum xnn_status xnn_create_truncation_nc_f32(
1727   size_t channels,
1728   size_t input_stride,
1729   size_t output_stride,
1730   uint32_t flags,
1731   xnn_operator_t* truncation_op_out);
1732 
1733 enum xnn_status xnn_setup_truncation_nc_f32(
1734   xnn_operator_t truncation_op,
1735   size_t batch_size,
1736   const float* input,
1737   float* output,
1738   pthreadpool_t threadpool);
1739 
1740 #ifndef XNN_NO_NCHW_OPERATORS
1741 
1742 enum xnn_status xnn_create_convolution2d_nchw_f32(
1743   uint32_t input_padding_top,
1744   uint32_t input_padding_right,
1745   uint32_t input_padding_bottom,
1746   uint32_t input_padding_left,
1747   uint32_t kernel_height,
1748   uint32_t kernel_width,
1749   uint32_t subsampling_height,
1750   uint32_t subsampling_width,
1751   uint32_t dilation_height,
1752   uint32_t dilation_width,
1753   uint32_t groups,
1754   size_t group_input_channels,
1755   size_t group_output_channels,
1756   size_t input_channel_stride,
1757   size_t output_channel_stride,
1758   const float* kernel,
1759   const float* bias,
1760   float output_min,
1761   float output_max,
1762   uint32_t flags,
1763   xnn_operator_t* convolution_op_out);
1764 
1765 enum xnn_status xnn_setup_convolution2d_nchw_f32(
1766   xnn_operator_t convolution_op,
1767   size_t batch_size,
1768   size_t input_height,
1769   size_t input_width,
1770   const float* input,
1771   float* output,
1772   pthreadpool_t threadpool);
1773 
1774 enum xnn_status xnn_create_global_average_pooling_ncw_f32(
1775   size_t channels,
1776   float output_min,
1777   float output_max,
1778   uint32_t flags,
1779   xnn_operator_t* global_average_pooling_op_out);
1780 
1781 enum xnn_status xnn_setup_global_average_pooling_ncw_f32(
1782   xnn_operator_t global_average_pooling_op,
1783   size_t batch_size,
1784   size_t width,
1785   const float* input,
1786   float* output,
1787   pthreadpool_t threadpool);
1788 
1789 #endif  // XNN_NO_NCHW_OPERATORS
1790 
1791 #endif  // XNN_NO_F32_OPERATORS
1792 
1793 #ifndef XNN_NO_X32_OPERATORS
1794 
1795 enum xnn_status xnn_create_channel_shuffle_nc_x32(
1796   size_t groups,
1797   size_t group_channels,
1798   size_t input_stride,
1799   size_t output_stride,
1800   uint32_t flags,
1801   xnn_operator_t* channel_shuffle_op_out);
1802 
1803 enum xnn_status xnn_setup_channel_shuffle_nc_x32(
1804   xnn_operator_t channel_shuffle_op,
1805   size_t batch_size,
1806   const void* input,
1807   void* output,
1808   pthreadpool_t threadpool);
1809 
1810 enum xnn_status xnn_create_constant_pad_nd_x32(
1811   const void* padding_value,
1812   uint32_t flags,
1813   xnn_operator_t* constant_pad_op_out);
1814 
1815 enum xnn_status xnn_setup_constant_pad_nd_x32(
1816   xnn_operator_t constant_pad_op,
1817   size_t num_dims,
1818   const size_t* input_shape,
1819   const size_t* pre_padding,
1820   const size_t* post_padding,
1821   const void* input,
1822   void* output,
1823   pthreadpool_t threadpool);
1824 
1825 enum xnn_status xnn_create_copy_nc_x32(
1826   size_t channels,
1827   size_t input_stride,
1828   size_t output_stride,
1829   uint32_t flags,
1830   xnn_operator_t* copy_op_out);
1831 
1832 enum xnn_status xnn_setup_copy_nc_x32(
1833   xnn_operator_t copy_op,
1834   size_t batch_size,
1835   const void* input,
1836   void* output,
1837   pthreadpool_t threadpool);
1838 
1839 enum xnn_status xnn_create_depth_to_space_nhwc_x32(
1840   size_t output_channels,
1841   size_t input_channel_stride,
1842   size_t output_channel_stride,
1843   uint32_t block_size,
1844   uint32_t flags,
1845   xnn_operator_t* depth_to_space_op_out);
1846 
1847 enum xnn_status xnn_setup_depth_to_space_nhwc_x32(
1848   xnn_operator_t depth_to_space_op,
1849   size_t batch_size,
1850   size_t input_height,
1851   size_t input_width,
1852   const void* input,
1853   void* output,
1854   pthreadpool_t threadpool);
1855 
1856 enum xnn_status xnn_create_depth_to_space_nchw2nhwc_x32(
1857   size_t output_channels,
1858   size_t input_channel_stride,
1859   size_t output_channel_stride,
1860   uint32_t block_size,
1861   uint32_t flags,
1862   xnn_operator_t* depth_to_space_op_out);
1863 
1864 enum xnn_status xnn_setup_depth_to_space_nchw2nhwc_x32(
1865   xnn_operator_t depth_to_space_op,
1866   size_t batch_size,
1867   size_t input_height,
1868   size_t input_width,
1869   const void* input,
1870   void* output,
1871   pthreadpool_t threadpool);
1872 
1873 enum xnn_status xnn_create_unpooling2d_nhwc_x32(
1874   uint32_t input_padding_top,
1875   uint32_t input_padding_right,
1876   uint32_t input_padding_bottom,
1877   uint32_t input_padding_left,
1878   uint32_t pooling_height,
1879   uint32_t pooling_width,
1880   size_t channels,
1881   size_t input_pixel_stride,
1882   size_t output_pixel_stride,
1883   uint32_t flags,
1884   xnn_operator_t* unpooling_op_out);
1885 
1886 enum xnn_status xnn_setup_unpooling2d_nhwc_x32(
1887   xnn_operator_t unpooling_op,
1888   size_t batch_size,
1889   size_t input_height,
1890   size_t input_width,
1891   const void* input,
1892   const uint32_t* index,
1893   void* output,
1894   pthreadpool_t threadpool);
1895 
1896 #endif  // XNN_NO_X32_OPERATORS
1897 
1898 #ifndef XNN_NO_F16_OPERATORS
1899 
1900 enum xnn_status xnn_create_add_nd_f16(
1901   float output_min,
1902   float output_max,
1903   uint32_t flags,
1904   xnn_operator_t* add_op_out);
1905 
1906 enum xnn_status xnn_setup_add_nd_f16(
1907   xnn_operator_t add_op,
1908   size_t num_input1_dims,
1909   const size_t* input1_shape,
1910   size_t num_input2_dims,
1911   const size_t* input2_shape,
1912   const void* input1,
1913   const void* input2,
1914   void* output,
1915   pthreadpool_t threadpool);
1916 
1917 enum xnn_status xnn_create_convolution2d_nhwc_f16(
1918   uint32_t input_padding_top,
1919   uint32_t input_padding_right,
1920   uint32_t input_padding_bottom,
1921   uint32_t input_padding_left,
1922   uint32_t kernel_height,
1923   uint32_t kernel_width,
1924   uint32_t subsampling_height,
1925   uint32_t subsampling_width,
1926   uint32_t dilation_height,
1927   uint32_t dilation_width,
1928   uint32_t groups,
1929   size_t group_input_channels,
1930   size_t group_output_channels,
1931   size_t input_channel_stride,
1932   size_t output_channel_stride,
1933   const void* kernel,
1934   const void* bias,
1935   float output_min,
1936   float output_max,
1937   uint32_t flags,
1938   xnn_operator_t* convolution_op_out);
1939 
1940 enum xnn_status xnn_setup_convolution2d_nhwc_f16(
1941   xnn_operator_t convolution_op,
1942   size_t batch_size,
1943   size_t input_height,
1944   size_t input_width,
1945   const void* input,
1946   void* output,
1947   pthreadpool_t threadpool);
1948 
1949 enum xnn_status xnn_create_fully_connected_nc_f16(
1950   size_t input_channels,
1951   size_t output_channels,
1952   size_t input_stride,
1953   size_t output_stride,
1954   const void* kernel,
1955   const void* bias,
1956   float output_min,
1957   float output_max,
1958   uint32_t flags,
1959   xnn_operator_t* fully_connected_op_out);
1960 
1961 enum xnn_status xnn_setup_fully_connected_nc_f16(
1962   xnn_operator_t fully_connected_op,
1963   size_t batch_size,
1964   const void* input,
1965   void* output,
1966   pthreadpool_t threadpool);
1967 
1968 enum xnn_status xnn_create_global_average_pooling_nwc_f16(
1969   size_t channels,
1970   size_t input_stride,
1971   size_t output_stride,
1972   float output_min,
1973   float output_max,
1974   uint32_t flags,
1975   xnn_operator_t* global_average_pooling_op_out);
1976 
1977 enum xnn_status xnn_setup_global_average_pooling_nwc_f16(
1978   xnn_operator_t global_average_pooling_op,
1979   size_t batch_size,
1980   size_t width,
1981   const void* input,
1982   void* output,
1983   pthreadpool_t threadpool);
1984 
1985 enum xnn_status xnn_create_hardswish_nc_f16(
1986   size_t channels,
1987   size_t input_stride,
1988   size_t output_stride,
1989   uint32_t flags,
1990   xnn_operator_t* hardswish_op_out);
1991 
1992 enum xnn_status xnn_setup_hardswish_nc_f16(
1993   xnn_operator_t hardswish_op,
1994   size_t batch_size,
1995   const void* input,
1996   void* output,
1997   pthreadpool_t threadpool);
1998 
1999 enum xnn_status xnn_create_max_pooling2d_nhwc_f16(
2000   uint32_t input_padding_top,
2001   uint32_t input_padding_right,
2002   uint32_t input_padding_bottom,
2003   uint32_t input_padding_left,
2004   uint32_t pooling_height,
2005   uint32_t pooling_width,
2006   uint32_t stride_height,
2007   uint32_t stride_width,
2008   uint32_t dilation_height,
2009   uint32_t dilation_width,
2010   size_t channels,
2011   size_t input_pixel_stride,
2012   size_t output_pixel_stride,
2013   float output_min,
2014   float output_max,
2015   uint32_t flags,
2016   xnn_operator_t* max_pooling_op_out);
2017 
2018 enum xnn_status xnn_setup_max_pooling2d_nhwc_f16(
2019   xnn_operator_t max_pooling_op,
2020   size_t batch_size,
2021   size_t input_height,
2022   size_t input_width,
2023   const void* input,
2024   void* output,
2025   pthreadpool_t threadpool);
2026 
2027 enum xnn_status xnn_create_multiply_nd_f16(
2028   float output_min,
2029   float output_max,
2030   uint32_t flags,
2031   xnn_operator_t* multiply_op_out);
2032 
2033 enum xnn_status xnn_setup_multiply_nd_f16(
2034   xnn_operator_t multiply_op,
2035   size_t num_input1_dims,
2036   const size_t* input1_shape,
2037   size_t num_input2_dims,
2038   const size_t* input2_shape,
2039   const void* input1,
2040   const void* input2,
2041   void* output,
2042   pthreadpool_t threadpool);
2043 
2044 enum xnn_status xnn_create_prelu_nc_f16(
2045   size_t channels,
2046   size_t input_stride,
2047   size_t output_stride,
2048   const void* negative_slope,
2049   uint32_t flags,
2050   xnn_operator_t* prelu_op_out);
2051 
2052 enum xnn_status xnn_setup_prelu_nc_f16(
2053   xnn_operator_t prelu_op,
2054   size_t batch_size,
2055   const void* input,
2056   void* output,
2057   pthreadpool_t threadpool);
2058 
2059 #endif  // XNN_NO_F16_OPERATORS
2060 
2061 #ifndef XNN_NO_X16_OPERATORS
2062 
2063 enum xnn_status xnn_create_constant_pad_nd_x16(
2064   const void* padding_value,
2065   uint32_t flags,
2066   xnn_operator_t* constant_pad_op_out);
2067 
2068 enum xnn_status xnn_setup_constant_pad_nd_x16(
2069   xnn_operator_t constant_pad_op,
2070   size_t num_dims,
2071   const size_t* input_shape,
2072   const size_t* pre_padding,
2073   const size_t* post_padding,
2074   const void* input,
2075   void* output,
2076   pthreadpool_t threadpool);
2077 
2078 enum xnn_status xnn_create_copy_nc_x16(
2079   size_t channels,
2080   size_t input_stride,
2081   size_t output_stride,
2082   uint32_t flags,
2083   xnn_operator_t* copy_op_out);
2084 
2085 enum xnn_status xnn_setup_copy_nc_x16(
2086   xnn_operator_t copy_op,
2087   size_t batch_size,
2088   const void* input,
2089   void* output,
2090   pthreadpool_t threadpool);
2091 
2092 #endif  // XNN_NO_X16_OPERATORS
2093 
2094 #ifndef XNN_NO_QC8_OPERATORS
2095 
2096 enum xnn_status xnn_create_convolution2d_nhwc_qc8(
2097   uint32_t input_padding_top,
2098   uint32_t input_padding_right,
2099   uint32_t input_padding_bottom,
2100   uint32_t input_padding_left,
2101   uint32_t kernel_height,
2102   uint32_t kernel_width,
2103   uint32_t subsampling_height,
2104   uint32_t subsampling_width,
2105   uint32_t dilation_height,
2106   uint32_t dilation_width,
2107   uint32_t groups,
2108   size_t group_input_channels,
2109   size_t group_output_channels,
2110   size_t input_channel_stride,
2111   size_t output_channel_stride,
2112   int8_t input_zero_point,
2113   float input_scale,
2114   const float* kernel_scale,
2115   const int8_t* kernel,
2116   const int32_t* bias,
2117   int8_t output_zero_point,
2118   float output_scale,
2119   int8_t output_min,
2120   int8_t output_max,
2121   uint32_t flags,
2122   xnn_operator_t* convolution_op_out);
2123 
2124 enum xnn_status xnn_setup_convolution2d_nhwc_qc8(
2125   xnn_operator_t convolution_op,
2126   size_t batch_size,
2127   size_t input_height,
2128   size_t input_width,
2129   const int8_t* input,
2130   int8_t* output,
2131   pthreadpool_t threadpool);
2132 
2133 #endif  // XNN_NO_QC8_OPERATORS
2134 
2135 #ifndef XNN_NO_QS8_OPERATORS
2136 
2137 enum xnn_status xnn_create_add_nd_qs8(
2138   int8_t input1_zero_point,
2139   float input1_scale,
2140   int8_t input2_zero_point,
2141   float input2_scale,
2142   int8_t output_zero_point,
2143   float output_scale,
2144   int8_t output_min,
2145   int8_t output_max,
2146   uint32_t flags,
2147   xnn_operator_t* add_op_out);
2148 
2149 enum xnn_status xnn_setup_add_nd_qs8(
2150   xnn_operator_t add_op,
2151   size_t num_input1_dims,
2152   const size_t* input1_shape,
2153   size_t num_input2_dims,
2154   const size_t* input2_shape,
2155   const int8_t* input1,
2156   const int8_t* input2,
2157   int8_t* output,
2158   pthreadpool_t threadpool);
2159 
2160 enum xnn_status xnn_create_convolution2d_nhwc_qs8(
2161   uint32_t input_padding_top,
2162   uint32_t input_padding_right,
2163   uint32_t input_padding_bottom,
2164   uint32_t input_padding_left,
2165   uint32_t kernel_height,
2166   uint32_t kernel_width,
2167   uint32_t subsampling_height,
2168   uint32_t subsampling_width,
2169   uint32_t dilation_height,
2170   uint32_t dilation_width,
2171   uint32_t groups,
2172   size_t group_input_channels,
2173   size_t group_output_channels,
2174   size_t input_channel_stride,
2175   size_t output_channel_stride,
2176   int8_t input_zero_point,
2177   float input_scale,
2178   float kernel_scale,
2179   const int8_t* kernel,
2180   const int32_t* bias,
2181   int8_t output_zero_point,
2182   float output_scale,
2183   int8_t output_min,
2184   int8_t output_max,
2185   uint32_t flags,
2186   xnn_operator_t* convolution_op_out);
2187 
2188 enum xnn_status xnn_setup_convolution2d_nhwc_qs8(
2189   xnn_operator_t convolution_op,
2190   size_t batch_size,
2191   size_t input_height,
2192   size_t input_width,
2193   const int8_t* input,
2194   int8_t* output,
2195   pthreadpool_t threadpool);
2196 
2197 enum xnn_status xnn_create_deconvolution2d_nhwc_qs8(
2198   uint32_t output_padding_top,
2199   uint32_t output_padding_right,
2200   uint32_t output_padding_bottom,
2201   uint32_t output_padding_left,
2202   uint32_t kernel_height,
2203   uint32_t kernel_width,
2204   uint32_t stride_height,
2205   uint32_t stride_width,
2206   uint32_t dilation_height,
2207   uint32_t dilation_width,
2208   uint32_t groups,
2209   size_t group_input_channels,
2210   size_t group_output_channels,
2211   size_t input_pixel_stride,
2212   size_t output_pixel_stride,
2213   int8_t input_zero_point,
2214   float input_scale,
2215   float kernel_scale,
2216   const int8_t* kernel,
2217   const int32_t* bias,
2218   int8_t output_zero_point,
2219   float output_scale,
2220   int8_t output_min,
2221   int8_t output_max,
2222   uint32_t flags,
2223   xnn_operator_t* deconvolution_op_out);
2224 
2225 enum xnn_status xnn_setup_deconvolution2d_nhwc_qs8(
2226   xnn_operator_t deconvolution_op,
2227   size_t batch_size,
2228   size_t input_height,
2229   size_t input_width,
2230   uint32_t adjustment_height,
2231   uint32_t adjustment_width,
2232   const int8_t* input,
2233   int8_t* output,
2234   pthreadpool_t threadpool);
2235 
2236 enum xnn_status xnn_create_elu_nc_qs8(
2237   size_t channels,
2238   size_t input_stride,
2239   size_t output_stride,
2240   float alpha,
2241   int8_t input_zero_point,
2242   float input_scale,
2243   int8_t output_zero_point,
2244   float output_scale,
2245   int8_t output_min,
2246   int8_t output_max,
2247   uint32_t flags,
2248   xnn_operator_t* elu_op_out);
2249 
2250 enum xnn_status xnn_setup_elu_nc_qs8(
2251   xnn_operator_t elu_op,
2252   size_t batch_size,
2253   const int8_t* input,
2254   int8_t* output,
2255   pthreadpool_t threadpool);
2256 
2257 enum xnn_status xnn_create_fully_connected_nc_qs8(
2258   size_t input_channels,
2259   size_t output_channels,
2260   size_t input_stride,
2261   size_t output_stride,
2262   int8_t input_zero_point,
2263   float input_scale,
2264   float kernel_scale,
2265   const int8_t* kernel,
2266   const int32_t* bias,
2267   int8_t output_zero_point,
2268   float output_scale,
2269   int8_t output_min,
2270   int8_t output_max,
2271   uint32_t flags,
2272   xnn_operator_t* fully_connected_op_out);
2273 
2274 enum xnn_status xnn_setup_fully_connected_nc_qs8(
2275   xnn_operator_t fully_connected_op,
2276   size_t batch_size,
2277   const int8_t* input,
2278   int8_t* output,
2279   pthreadpool_t threadpool);
2280 
2281 enum xnn_status xnn_create_global_average_pooling_nwc_qs8(
2282   size_t channels,
2283   size_t input_stride,
2284   size_t output_stride,
2285   int8_t input_zero_point,
2286   float input_scale,
2287   int8_t output_zero_point,
2288   float output_scale,
2289   int8_t output_min,
2290   int8_t output_max,
2291   uint32_t flags,
2292   xnn_operator_t* global_average_pooling_op_out);
2293 
2294 enum xnn_status xnn_setup_global_average_pooling_nwc_qs8(
2295   xnn_operator_t global_average_pooling_op,
2296   size_t batch_size,
2297   size_t width,
2298   const int8_t* input,
2299   int8_t* output,
2300   pthreadpool_t threadpool);
2301 
2302 enum xnn_status xnn_create_multiply_nd_qs8(
2303   int8_t input1_zero_point,
2304   float input1_scale,
2305   int8_t input2_zero_point,
2306   float input2_scale,
2307   int8_t output_zero_point,
2308   float output_scale,
2309   int8_t output_min,
2310   int8_t output_max,
2311   uint32_t flags,
2312   xnn_operator_t* multiply_op_out);
2313 
2314 enum xnn_status xnn_setup_multiply_nd_qs8(
2315   xnn_operator_t multiply_op,
2316   size_t num_input1_dims,
2317   const size_t* input1_shape,
2318   size_t num_input2_dims,
2319   const size_t* input2_shape,
2320   const int8_t* input1,
2321   const int8_t* input2,
2322   int8_t* output,
2323   pthreadpool_t threadpool);
2324 
2325 enum xnn_status xnn_create_sigmoid_nc_qs8(
2326   size_t channels,
2327   size_t input_stride,
2328   size_t output_stride,
2329   int8_t input_zero_point,
2330   float input_scale,
2331   int8_t output_zero_point,
2332   float output_scale,
2333   int8_t output_min,
2334   int8_t output_max,
2335   uint32_t flags,
2336   xnn_operator_t* sigmoid_op_out);
2337 
2338 enum xnn_status xnn_setup_sigmoid_nc_qs8(
2339   xnn_operator_t sigmoid_op,
2340   size_t batch_size,
2341   const int8_t* input,
2342   int8_t* output,
2343   pthreadpool_t threadpool);
2344 
2345 enum xnn_status xnn_create_subtract_nd_qs8(
2346   int8_t input1_zero_point,
2347   float input1_scale,
2348   int8_t input2_zero_point,
2349   float input2_scale,
2350   int8_t output_zero_point,
2351   float output_scale,
2352   int8_t output_min,
2353   int8_t output_max,
2354   uint32_t flags,
2355   xnn_operator_t* subtract_op_out);
2356 
2357 enum xnn_status xnn_setup_subtract_nd_qs8(
2358   xnn_operator_t subtract_op,
2359   size_t num_input1_dims,
2360   const size_t* input1_shape,
2361   size_t num_input2_dims,
2362   const size_t* input2_shape,
2363   const int8_t* input1,
2364   const int8_t* input2,
2365   int8_t* output,
2366   pthreadpool_t threadpool);
2367 
2368 enum xnn_status xnn_create_tanh_nc_qs8(
2369   size_t channels,
2370   size_t input_stride,
2371   size_t output_stride,
2372   int8_t input_zero_point,
2373   float input_scale,
2374   int8_t output_zero_point,
2375   float output_scale,
2376   int8_t output_min,
2377   int8_t output_max,
2378   uint32_t flags,
2379   xnn_operator_t* tanh_op_out);
2380 
2381 enum xnn_status xnn_setup_tanh_nc_qs8(
2382   xnn_operator_t tanh_op,
2383   size_t batch_size,
2384   const int8_t* input,
2385   int8_t* output,
2386   pthreadpool_t threadpool);
2387 
2388 #endif  // XNN_NO_QS8_OPERATORS
2389 
2390 #ifndef XNN_NO_QU8_OPERATORS
2391 
2392 enum xnn_status xnn_create_add_nd_qu8(
2393   uint8_t input1_zero_point,
2394   float input1_scale,
2395   uint8_t input2_zero_point,
2396   float input2_scale,
2397   uint8_t output_zero_point,
2398   float output_scale,
2399   uint8_t output_min,
2400   uint8_t output_max,
2401   uint32_t flags,
2402   xnn_operator_t* add_op_out);
2403 
2404 enum xnn_status xnn_setup_add_nd_qu8(
2405   xnn_operator_t add_op,
2406   size_t num_input1_dims,
2407   const size_t* input1_shape,
2408   size_t num_input2_dims,
2409   const size_t* input2_shape,
2410   const uint8_t* input1,
2411   const uint8_t* input2,
2412   uint8_t* output,
2413   pthreadpool_t threadpool);
2414 
2415 enum xnn_status xnn_create_average_pooling2d_nhwc_qu8(
2416   uint32_t input_padding_top,
2417   uint32_t input_padding_right,
2418   uint32_t input_padding_bottom,
2419   uint32_t input_padding_left,
2420   uint32_t pooling_height,
2421   uint32_t pooling_width,
2422   uint32_t stride_height,
2423   uint32_t stride_width,
2424   size_t channels,
2425   size_t input_pixel_stride,
2426   size_t output_pixel_stride,
2427   uint8_t input_zero_point,
2428   float input_scale,
2429   uint8_t output_zero_point,
2430   float output_scale,
2431   uint8_t output_min,
2432   uint8_t output_max,
2433   uint32_t flags,
2434   xnn_operator_t* average_pooling_op_out);
2435 
2436 enum xnn_status xnn_setup_average_pooling2d_nhwc_qu8(
2437   xnn_operator_t average_pooling_op,
2438   size_t batch_size,
2439   size_t input_height,
2440   size_t input_width,
2441   const uint8_t* input,
2442   uint8_t* output,
2443   pthreadpool_t threadpool);
2444 
2445 enum xnn_status xnn_create_convolution2d_nhwc_qu8(
2446   uint32_t input_padding_top,
2447   uint32_t input_padding_right,
2448   uint32_t input_padding_bottom,
2449   uint32_t input_padding_left,
2450   uint32_t kernel_height,
2451   uint32_t kernel_width,
2452   uint32_t subsampling_height,
2453   uint32_t subsampling_width,
2454   uint32_t dilation_height,
2455   uint32_t dilation_width,
2456   uint32_t groups,
2457   size_t group_input_channels,
2458   size_t group_output_channels,
2459   size_t input_channel_stride,
2460   size_t output_channel_stride,
2461   uint8_t input_zero_point,
2462   float input_scale,
2463   uint8_t kernel_zero_point,
2464   float kernel_scale,
2465   const uint8_t* kernel,
2466   const int32_t* bias,
2467   uint8_t output_zero_point,
2468   float output_scale,
2469   uint8_t output_min,
2470   uint8_t output_max,
2471   uint32_t flags,
2472   xnn_operator_t* convolution_op_out);
2473 
2474 enum xnn_status xnn_setup_convolution2d_nhwc_qu8(
2475   xnn_operator_t convolution_op,
2476   size_t batch_size,
2477   size_t input_height,
2478   size_t input_width,
2479   const uint8_t* input,
2480   uint8_t* output,
2481   pthreadpool_t threadpool);
2482 
2483 enum xnn_status xnn_create_deconvolution2d_nhwc_qu8(
2484   uint32_t output_padding_top,
2485   uint32_t output_padding_right,
2486   uint32_t output_padding_bottom,
2487   uint32_t output_padding_left,
2488   uint32_t kernel_height,
2489   uint32_t kernel_width,
2490   uint32_t stride_height,
2491   uint32_t stride_width,
2492   uint32_t dilation_height,
2493   uint32_t dilation_width,
2494   uint32_t groups,
2495   size_t group_input_channels,
2496   size_t group_output_channels,
2497   size_t input_pixel_stride,
2498   size_t output_pixel_stride,
2499   uint8_t input_zero_point,
2500   float input_scale,
2501   uint8_t kernel_zero_point,
2502   float kernel_scale,
2503   const uint8_t* kernel,
2504   const int32_t* bias,
2505   uint8_t output_zero_point,
2506   float output_scale,
2507   uint8_t output_min,
2508   uint8_t output_max,
2509   uint32_t flags,
2510   xnn_operator_t* deconvolution_op_out);
2511 
2512 enum xnn_status xnn_setup_deconvolution2d_nhwc_qu8(
2513   xnn_operator_t deconvolution_op,
2514   size_t batch_size,
2515   size_t input_height,
2516   size_t input_width,
2517   uint32_t adjustment_height,
2518   uint32_t adjustment_width,
2519   const uint8_t* input,
2520   uint8_t* output,
2521   pthreadpool_t threadpool);
2522 
2523 enum xnn_status xnn_create_fully_connected_nc_qu8(
2524   size_t input_channels,
2525   size_t output_channels,
2526   size_t input_stride,
2527   size_t output_stride,
2528   uint8_t input_zero_point,
2529   float input_scale,
2530   uint8_t kernel_zero_point,
2531   float kernel_scale,
2532   const uint8_t* kernel,
2533   const int32_t* bias,
2534   uint8_t output_zero_point,
2535   float output_scale,
2536   uint8_t output_min,
2537   uint8_t output_max,
2538   uint32_t flags,
2539   xnn_operator_t* fully_connected_op_out);
2540 
2541 enum xnn_status xnn_setup_fully_connected_nc_qu8(
2542   xnn_operator_t fully_connected_op,
2543   size_t batch_size,
2544   const uint8_t* input,
2545   uint8_t* output,
2546   pthreadpool_t threadpool);
2547 
2548 enum xnn_status xnn_create_global_average_pooling_nwc_qu8(
2549   size_t channels,
2550   size_t input_stride,
2551   size_t output_stride,
2552   uint8_t input_zero_point,
2553   float input_scale,
2554   uint8_t output_zero_point,
2555   float output_scale,
2556   uint8_t output_min,
2557   uint8_t output_max,
2558   uint32_t flags,
2559   xnn_operator_t* global_average_pooling_op_out);
2560 
2561 enum xnn_status xnn_setup_global_average_pooling_nwc_qu8(
2562   xnn_operator_t global_average_pooling_op,
2563   size_t batch_size,
2564   size_t width,
2565   const uint8_t* input,
2566   uint8_t* output,
2567   pthreadpool_t threadpool);
2568 
2569 enum xnn_status xnn_create_leaky_relu_nc_qu8(
2570   size_t channels,
2571   size_t input_stride,
2572   size_t output_stride,
2573   float negative_slope,
2574   uint8_t input_zero_point,
2575   float input_scale,
2576   uint8_t output_zero_point,
2577   float output_scale,
2578   uint8_t output_min,
2579   uint8_t output_max,
2580   uint32_t flags,
2581   xnn_operator_t* leaky_relu_op_out);
2582 
2583 enum xnn_status xnn_setup_leaky_relu_nc_qu8(
2584   xnn_operator_t leaky_relu_op,
2585   size_t batch_size,
2586   const uint8_t* input,
2587   uint8_t* output,
2588   pthreadpool_t threadpool);
2589 
2590 enum xnn_status xnn_create_multiply_nd_qu8(
2591   uint8_t input1_zero_point,
2592   float input1_scale,
2593   uint8_t input2_zero_point,
2594   float input2_scale,
2595   uint8_t output_zero_point,
2596   float output_scale,
2597   uint8_t output_min,
2598   uint8_t output_max,
2599   uint32_t flags,
2600   xnn_operator_t* multiply_op_out);
2601 
2602 enum xnn_status xnn_setup_multiply_nd_qu8(
2603   xnn_operator_t multiply_op,
2604   size_t num_input1_dims,
2605   const size_t* input1_shape,
2606   size_t num_input2_dims,
2607   const size_t* input2_shape,
2608   const uint8_t* input1,
2609   const uint8_t* input2,
2610   uint8_t* output,
2611   pthreadpool_t threadpool);
2612 
2613 enum xnn_status xnn_create_sigmoid_nc_qu8(
2614   size_t channels,
2615   size_t input_stride,
2616   size_t output_stride,
2617   uint8_t input_zero_point,
2618   float input_scale,
2619   uint8_t output_zero_point,
2620   float output_scale,
2621   uint8_t output_min,
2622   uint8_t output_max,
2623   uint32_t flags,
2624   xnn_operator_t* sigmoid_op_out);
2625 
2626 enum xnn_status xnn_setup_sigmoid_nc_qu8(
2627   xnn_operator_t sigmoid_op,
2628   size_t batch_size,
2629   const uint8_t* input,
2630   uint8_t* output,
2631   pthreadpool_t threadpool);
2632 
2633 enum xnn_status xnn_create_softmax_nc_qu8(
2634   size_t channels,
2635   size_t input_stride,
2636   size_t output_stride,
2637   float input_scale,
2638   uint8_t output_zero_point,
2639   float output_scale,
2640   uint32_t flags,
2641   xnn_operator_t* softmax_op_out);
2642 
2643 enum xnn_status xnn_setup_softmax_nc_qu8(
2644   xnn_operator_t softmax_op,
2645   size_t batch_size,
2646   const uint8_t* input,
2647   uint8_t* output,
2648   pthreadpool_t threadpool);
2649 
2650 enum xnn_status xnn_create_subtract_nd_qu8(
2651   uint8_t input1_zero_point,
2652   float input1_scale,
2653   uint8_t input2_zero_point,
2654   float input2_scale,
2655   uint8_t output_zero_point,
2656   float output_scale,
2657   uint8_t output_min,
2658   uint8_t output_max,
2659   uint32_t flags,
2660   xnn_operator_t* subtract_op_out);
2661 
2662 enum xnn_status xnn_setup_subtract_nd_qu8(
2663   xnn_operator_t subtract_op,
2664   size_t num_input1_dims,
2665   const size_t* input1_shape,
2666   size_t num_input2_dims,
2667   const size_t* input2_shape,
2668   const uint8_t* input1,
2669   const uint8_t* input2,
2670   uint8_t* output,
2671   pthreadpool_t threadpool);
2672 
2673 enum xnn_status xnn_create_tanh_nc_qu8(
2674   size_t channels,
2675   size_t input_stride,
2676   size_t output_stride,
2677   uint8_t input_zero_point,
2678   float input_scale,
2679   uint8_t output_zero_point,
2680   float output_scale,
2681   uint8_t output_min,
2682   uint8_t output_max,
2683   uint32_t flags,
2684   xnn_operator_t* tanh_op_out);
2685 
2686 enum xnn_status xnn_setup_tanh_nc_qu8(
2687   xnn_operator_t tanh_op,
2688   size_t batch_size,
2689   const uint8_t* input,
2690   uint8_t* output,
2691   pthreadpool_t threadpool);
2692 
2693 #endif  // XNN_NO_QU8_OPERATORS
2694 
2695 #ifndef XNN_NO_S8_OPERATORS
2696 
2697 enum xnn_status xnn_create_clamp_nc_s8(
2698   size_t channels,
2699   size_t input_stride,
2700   size_t output_stride,
2701   int8_t output_min,
2702   int8_t output_max,
2703   uint32_t flags,
2704   xnn_operator_t* clamp_op_out);
2705 
2706 enum xnn_status xnn_setup_clamp_nc_s8(
2707   xnn_operator_t clamp_op,
2708   size_t batch_size,
2709   const int8_t* input,
2710   int8_t* output,
2711   pthreadpool_t threadpool);
2712 
2713 enum xnn_status xnn_create_max_pooling2d_nhwc_s8(
2714   uint32_t input_padding_top,
2715   uint32_t input_padding_right,
2716   uint32_t input_padding_bottom,
2717   uint32_t input_padding_left,
2718   uint32_t pooling_height,
2719   uint32_t pooling_width,
2720   uint32_t stride_height,
2721   uint32_t stride_width,
2722   uint32_t dilation_height,
2723   uint32_t dilation_width,
2724   size_t channels,
2725   size_t input_pixel_stride,
2726   size_t output_pixel_stride,
2727   int8_t output_min,
2728   int8_t output_max,
2729   uint32_t flags,
2730   xnn_operator_t* max_pooling_op_out);
2731 
2732 enum xnn_status xnn_setup_max_pooling2d_nhwc_s8(
2733   xnn_operator_t max_pooling_op,
2734   size_t batch_size,
2735   size_t input_height,
2736   size_t input_width,
2737   const int8_t* input,
2738   int8_t* output,
2739   pthreadpool_t threadpool);
2740 
2741 enum xnn_status xnn_create_resize_bilinear2d_nhwc_s8(
2742   size_t channels,
2743   size_t input_pixel_stride,
2744   size_t output_pixel_stride,
2745   uint32_t flags,
2746   xnn_operator_t* resize_op_out);
2747 
2748 enum xnn_status xnn_setup_resize_bilinear2d_nhwc_s8(
2749   xnn_operator_t resize_op,
2750   size_t batch_size,
2751   size_t input_height,
2752   size_t input_width,
2753   size_t output_height,
2754   size_t output_width,
2755   const int8_t* input,
2756   int8_t* output,
2757   pthreadpool_t threadpool);
2758 
2759 #endif  // XNN_NO_S8_OPERATORS
2760 
2761 #ifndef XNN_NO_U8_OPERATORS
2762 
2763 enum xnn_status xnn_create_clamp_nc_u8(
2764   size_t channels,
2765   size_t input_stride,
2766   size_t output_stride,
2767   uint8_t output_min,
2768   uint8_t output_max,
2769   uint32_t flags,
2770   xnn_operator_t* clamp_op_out);
2771 
2772 enum xnn_status xnn_setup_clamp_nc_u8(
2773   xnn_operator_t clamp_op,
2774   size_t batch_size,
2775   const uint8_t* input,
2776   uint8_t* output,
2777   pthreadpool_t threadpool);
2778 
2779 enum xnn_status xnn_create_max_pooling2d_nhwc_u8(
2780   uint32_t input_padding_top,
2781   uint32_t input_padding_right,
2782   uint32_t input_padding_bottom,
2783   uint32_t input_padding_left,
2784   uint32_t pooling_height,
2785   uint32_t pooling_width,
2786   uint32_t stride_height,
2787   uint32_t stride_width,
2788   uint32_t dilation_height,
2789   uint32_t dilation_width,
2790   size_t channels,
2791   size_t input_pixel_stride,
2792   size_t output_pixel_stride,
2793   uint8_t output_min,
2794   uint8_t output_max,
2795   uint32_t flags,
2796   xnn_operator_t* max_pooling_op_out);
2797 
2798 enum xnn_status xnn_setup_max_pooling2d_nhwc_u8(
2799   xnn_operator_t max_pooling_op,
2800   size_t batch_size,
2801   size_t input_height,
2802   size_t input_width,
2803   const uint8_t* input,
2804   uint8_t* output,
2805   pthreadpool_t threadpool);
2806 
2807 enum xnn_status xnn_create_resize_bilinear2d_nhwc_u8(
2808   size_t channels,
2809   size_t input_pixel_stride,
2810   size_t output_pixel_stride,
2811   uint32_t flags,
2812   xnn_operator_t* resize_op_out);
2813 
2814 enum xnn_status xnn_setup_resize_bilinear2d_nhwc_u8(
2815   xnn_operator_t resize_op,
2816   size_t batch_size,
2817   size_t input_height,
2818   size_t input_width,
2819   size_t output_height,
2820   size_t output_width,
2821   const uint8_t* input,
2822   uint8_t* output,
2823   pthreadpool_t threadpool);
2824 
2825 #endif  // XNN_NO_U8_OPERATORS
2826 
2827 #ifndef XNN_NO_X8_OPERATORS
2828 
2829 enum xnn_status xnn_create_copy_nc_x8(
2830   size_t channels,
2831   size_t input_stride,
2832   size_t output_stride,
2833   uint32_t flags,
2834   xnn_operator_t* copy_op_out);
2835 
2836 enum xnn_status xnn_setup_copy_nc_x8(
2837   xnn_operator_t copy_op,
2838   size_t batch_size,
2839   const void* input,
2840   void* output,
2841   pthreadpool_t threadpool);
2842 
2843 enum xnn_status xnn_create_channel_shuffle_nc_x8(
2844   size_t groups,
2845   size_t group_channels,
2846   size_t input_stride,
2847   size_t output_stride,
2848   uint32_t flags,
2849   xnn_operator_t* channel_shuffle_op_out);
2850 
2851 enum xnn_status xnn_setup_channel_shuffle_nc_x8(
2852   xnn_operator_t channel_shuffle_op,
2853   size_t batch_size,
2854   const void* input,
2855   void* output,
2856   pthreadpool_t threadpool);
2857 
2858 enum xnn_status xnn_create_constant_pad_nd_x8(
2859   const void* padding_value,
2860   uint32_t flags,
2861   xnn_operator_t* constant_pad_op_out);
2862 
2863 enum xnn_status xnn_setup_constant_pad_nd_x8(
2864   xnn_operator_t constant_pad_op,
2865   size_t num_dims,
2866   const size_t* input_shape,
2867   const size_t* pre_padding,
2868   const size_t* post_padding,
2869   const void* input,
2870   void* output,
2871   pthreadpool_t threadpool);
2872 
2873 #endif  // XNN_NO_X8_OPERATORS
2874 
2875 #ifndef XNN_NO_CVT_OPERATORS
2876 
2877 enum xnn_status xnn_create_convert_nc_f16_f32(
2878   size_t channels,
2879   size_t input_stride,
2880   size_t output_stride,
2881   uint32_t flags,
2882   xnn_operator_t* convert_op_out);
2883 
2884 enum xnn_status xnn_setup_convert_nc_f16_f32(
2885   xnn_operator_t convert_op,
2886   size_t batch_size,
2887   const void* input,
2888   float* output,
2889   pthreadpool_t threadpool);
2890 
2891 enum xnn_status xnn_create_convert_nc_f32_f16(
2892   size_t channels,
2893   size_t input_stride,
2894   size_t output_stride,
2895   uint32_t flags,
2896   xnn_operator_t* convert_op_out);
2897 
2898 enum xnn_status xnn_setup_convert_nc_f32_f16(
2899   xnn_operator_t convert_op,
2900   size_t batch_size,
2901   const float* input,
2902   void* output,
2903   pthreadpool_t threadpool);
2904 
2905 enum xnn_status xnn_create_convert_nc_f32_qs8(
2906   size_t channels,
2907   size_t input_stride,
2908   size_t output_stride,
2909   float output_scale,
2910   int8_t output_zero_point,
2911   int8_t output_min,
2912   int8_t output_max,
2913   uint32_t flags,
2914   xnn_operator_t* convert_op_out);
2915 
2916 enum xnn_status xnn_setup_convert_nc_f32_qs8(
2917   xnn_operator_t convert_op,
2918   size_t batch_size,
2919   const float* input,
2920   int8_t* output,
2921   pthreadpool_t threadpool);
2922 
2923 enum xnn_status xnn_create_convert_nc_f32_qu8(
2924   size_t channels,
2925   size_t input_stride,
2926   size_t output_stride,
2927   float output_scale,
2928   uint8_t output_zero_point,
2929   uint8_t output_min,
2930   uint8_t output_max,
2931   uint32_t flags,
2932   xnn_operator_t* convert_op_out);
2933 
2934 enum xnn_status xnn_setup_convert_nc_f32_qu8(
2935   xnn_operator_t convert_op,
2936   size_t batch_size,
2937   const float* input,
2938   uint8_t* output,
2939   pthreadpool_t threadpool);
2940 
2941 enum xnn_status xnn_create_convert_nc_qs8_f32(
2942   size_t channels,
2943   size_t input_stride,
2944   size_t output_stride,
2945   float input_scale,
2946   int8_t input_zero_point,
2947   uint32_t flags,
2948   xnn_operator_t* convert_op_out);
2949 
2950 enum xnn_status xnn_setup_convert_nc_qs8_f32(
2951   xnn_operator_t convert_op,
2952   size_t batch_size,
2953   const int8_t* input,
2954   float* output,
2955   pthreadpool_t threadpool);
2956 
2957 enum xnn_status xnn_create_convert_nc_qu8_f32(
2958   size_t channels,
2959   size_t input_stride,
2960   size_t output_stride,
2961   float input_scale,
2962   uint8_t input_zero_point,
2963   uint32_t flags,
2964   xnn_operator_t* convert_op_out);
2965 
2966 enum xnn_status xnn_setup_convert_nc_qu8_f32(
2967   xnn_operator_t convert_op,
2968   size_t batch_size,
2969   const uint8_t* input,
2970   float* output,
2971   pthreadpool_t threadpool);
2972 
2973 #endif  // XNN_NO_CVT_OPERATORS
2974 
2975 #ifdef __cplusplus
2976 }  // extern "C"
2977 #endif
2978