• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (c) Facebook, Inc. and its affiliates.
2 // All rights reserved.
3 //
4 // Copyright 2019 Google LLC
5 //
6 // This source code is licensed under the BSD-style license found in the
7 // LICENSE file in the root directory of this source tree.
8 
9 #pragma once
10 
11 #include <stdbool.h>
12 #include <stddef.h>
13 #include <stdint.h>
14 
15 #include <pthreadpool.h>
16 
17 #ifdef __cplusplus
18 extern "C" {
19 #endif
20 
21 /// The number of bytes XNNPACK may read beyond array bounds.
22 /// The caller must allocate at this this many extra bytes after the tensor data passed to XNNPACK.
23 ///
24 /// Note: XNNPACK reads, but never writes beyond array bounds.
25 #define XNN_EXTRA_BYTES 16
26 
27 /// Maximum number of dimensions in tensor shape.
28 #define XNN_MAX_TENSOR_DIMS 6
29 
30 /// The convolution operator represents a depthwise convolution, and use HWGo layout for filters.
31 #define XNN_FLAG_DEPTHWISE_CONVOLUTION 0x00000001
32 
33 /// Assume transposed weights in a fully connected operator.
34 #define XNN_FLAG_TRANSPOSE_WEIGHTS 0x00000001
35 
36 /// The operator assumes NHWC layout for the input, regardless of the output layout.
37 #define XNN_FLAG_INPUT_NHWC 0x00000002
38 
39 /// Match "SAME" padding in TensorFlow. Exact padding values are computed dynamically depending on input size.
40 #define XNN_FLAG_TENSORFLOW_SAME_PADDING 0x00000004
41 
42 /// Match behaviour of TensorFlow 1.x.
43 #define XNN_FLAG_TENSORFLOW_LEGACY_MODE 0x00000004
44 
45 /// Align corners of input and output images in resize operations.
46 #define XNN_FLAG_ALIGN_CORNERS 0x00000008
47 
48 /// Status code for any XNNPACK function call.
49 enum xnn_status {
50   /// The call succeeded, and all output arguments now contain valid data.
51   xnn_status_success = 0,
52   xnn_status_uninitialized = 1,
53   xnn_status_invalid_parameter = 2,
54   xnn_status_invalid_state = 3,
55   xnn_status_unsupported_parameter = 4,
56   xnn_status_unsupported_hardware = 5,
57   xnn_status_out_of_memory = 6,
58 };
59 
60 struct xnn_allocator {
61   /// User-specified pointer that will be passed as-is to all functions in this structure.
62   void* context;
63   /// Pointer to a function to be called for general memory allocation.
64   ///
65   /// @param context - The user-specified pointer from xnn_allocator structure.
66   /// @param size - The size of the memory block to allocate, in bytes.
67   ///
68   /// @returns Pointer to the allocated memory block of at least @ref size bytes.
69   ///          If allocation fails, the function must return NULL.
70   void* (*allocate)(void* context, size_t size);
71   /// Pointer to a function to be called for general memory re-allocation, i.e. to increase or shrink a previously
72   /// allocated memory block. The content of the old memory block is copied to the new memory block.
73   ///
74   /// @param context - The user-specified pointer from xnn_allocator structure.
75   /// @param pointer - Pointer to a memory block allocated by @ref allocate or @ref reallocate functions. Can be NULL.
76   ///                  If the pointer is NULL, the @ref reallocate call is equivalent to an @ref allocate call.
77   /// @param size - The new size of the memory block to allocate, in bytes.
78   ///
79   /// @returns Pointer to the newly allocated memory block of at least @ref size bytes with the content of the previous
80   ///          memory block.
81   ///          If allocation fails, the function must return NULL, but must not release the previous memory block.
82   void* (*reallocate)(void* context, void* pointer, size_t size);
83   /// Pointer to a function to be called for general memory de-allocation.
84   ///
85   /// @param context - The user-specified pointer from xnn_allocator structure.
86   /// @param pointer - Pointer to a memory block allocated by @ref allocate or @ref reallocate functions. Can be NULL.
87   ///                  If the pointer is NULL, the @ref deallocate call is a no-op.
88   void (*deallocate)(void* context, void* pointer);
89   /// Pointer to a function to be called for aligned memory allocation.
90   ///
91   /// @param context - The user-specified pointer from xnn_allocator structure.
92   /// @param alignment - The alignment of the memory block to allocate, in bytes. Alignment is always a power-of-2.
93   /// @param size - The size of the memory block to allocate, in bytes.
94   ///
95   /// @returns Pointer to the allocated memory block of at least @ref size bytes.
96   ///          If allocation fails, the function must return NULL.
97   void* (*aligned_allocate)(void* context, size_t alignment, size_t size);
98   /// Pointer to a function to be called for aligned memory de-allocation.
99   ///
100   /// @param context - The user-specified pointer from xnn_allocator structure.
101   /// @param pointer - Pointer to a memory block allocated by @ref aligned_allocate function. Can be NULL.
102   ///                  If the pointer is NULL, the @ref aligned_deallocate call is a no-op.
103   void (*aligned_deallocate)(void* context, void* pointer);
104 };
105 
106 /// Initialize XNNPACK library.
107 ///
108 /// XNNPACK must be successfully initialized before use.
109 /// During initialization, XNNPACK populates internal structures depending on host processor. It can be time-consuming.
110 ///
111 /// @param[in] allocator - structure with function pointers to be use for memory allocation and de-allocation.
112 ///                        If this argument is NULL, system-provided memory management functions (e.g. malloc/free)
113 ///                        will be used.
114 ///
115 /// @retval xnn_status_success - XNNPACK is succesfully initialized and ready to use.
116 /// @retval xnn_status_out_of_memory - initialization failed due to out-of-memory condition.
117 /// @retval xnn_status_unsupported_hardware - initialization failed because the host processor does not satisfy the
118 ///                                           minimum hardware requirements for XNNPACK. E.g. this may happen on x86
119 ///                                           processors without SSE2 extension, or on 32-bit ARM processors without
120 ///                                           the NEON SIMD extension.
121 enum xnn_status xnn_initialize(const struct xnn_allocator* allocator);
122 
123 /// Deinitialize XNNPACK library.
124 ///
125 /// To avoid memory and resource leaks, users must call xnn_deinitialize once for each successful xnn_initialize call.
126 ///
127 /// @retval xnn_status_success - deinitialization call succeeded.
128 enum xnn_status xnn_deinitialize(void);
129 
130 /// Subgraph is an abstract representation of a neural network model.
131 /// Subgraph objects are used to define Values (tensors) and Nodes (operators) comprising the model.
132 typedef struct xnn_subgraph* xnn_subgraph_t;
133 
134 /// Create a empty Subgraph object.
135 ///
136 /// @param external_value_ids - number of Value IDs to reserve for communication with external graph representation.
137 ///                             The Subgraph object would avoid creating internal Value IDs in the
138 ///                             [0, reserved_value_ids-1] range.
139 /// @param flags - binary features of the subgraph. No supported flags are currently defined.
140 /// @param subgraph_out - pointer to the variable that will be initialized with a handle to the Subgraph object upon
141 ///                       successful return.
142 enum xnn_status xnn_create_subgraph(
143   uint32_t external_value_ids,
144   uint32_t flags,
145   xnn_subgraph_t* subgraph_out);
146 
147 /// Destroy a Subgraph object, as well as Values, and Nodes associated with the subgraph.
148 ///
149 /// @param subgraph - the Subgraph object to destroy.
150 enum xnn_status xnn_delete_subgraph(
151   xnn_subgraph_t subgraph);
152 
153 #define XNN_VALUE_FLAG_EXTERNAL_INPUT  0x00000001
154 #define XNN_VALUE_FLAG_EXTERNAL_OUTPUT 0x00000002
155 
156 #define XNN_INVALID_VALUE_ID UINT32_MAX
157 
158 /// Type of elements in a Value object.
159 enum xnn_datatype {
160   /// Invalid data type. Valid Values never have this datatype.
161   xnn_datatype_invalid = 0,
162   /// IEEE754 single-precision floating-point.
163   xnn_datatype_fp32 = 1,
164   /// IEEE754 half-precision floating-point.
165   xnn_datatype_fp16 = 2,
166 };
167 
168 /// Define a tensor-type Value and add it to a Subgraph.
169 ///
170 /// @param subgraph - a Subgraph object that will own the created Value.
171 /// @param datatype - type of the tensor elements.
172 /// @param num_dims - number of dimensions in the shape.
173 /// @param dims - pointer to an array of @a num_dims shape dimensions. If num_dims is 0, this pointer can be NULL.
174 ///               XNNPACK does not keep any pointers to this array after the function returns.
175 /// @param data - pointer to static data used for tensor initialization. If the tensor is not statically initialized,
176 ///               this pointer must be is NULL. If non-NULL, the life-time of the static data must exceed the life-time
177 ///               of the Subgraph object, and of any Runtime objects created from the Subgraph.
178 /// @param external_id - external ID for the Value. The ID must be within the range of reversed Value IDs specified on
179 ///                      the Subgraph creation. If the external ID is XNN_INVALID_VALUE_ID, an internal ID will be
180 ///                      created for the Value.
181 /// @param flags - binary features of the Value. Supported values are any combination of XNN_VALUE_FLAG_EXTERNAL_INPUT
182 ///                and XNN_VALUE_FLAG_EXTERNAL_OUTPUT.
183 /// @param id_out - pointer to the variable that will be initialized with the Value ID upon successful return. If a
184 ///                 valid @a external_id was provided, the variable will be initialized with the @a external_id value.
185 enum xnn_status xnn_define_tensor_value(
186   xnn_subgraph_t subgraph,
187   enum xnn_datatype datatype,
188   size_t num_dims,
189   const size_t* dims,
190   const void* data,
191   uint32_t external_id,
192   uint32_t flags,
193   uint32_t* id_out);
194 
195 /// Define a 2D Convolution Node and add it to a Subgraph.
196 ///
197 /// @param subgraph - a Subgraph object that will own the created Node.
198 /// @param input_padding_top - implicit zero-padding above 2D input data.
199 /// @param input_padding_right - implicit zero-padding to the right of 2D input data.
200 /// @param input_padding_bottom - implicit zero-padding below 2D input data.
201 /// @param input_padding_left - implicit zero-padding to the left of 2D input data.
202 /// @param kernel_height - kernel (filter) height.
203 /// @param kernel_width - kernel (filter) width.
204 /// @param subsampling_height - height of subsampling region for convolution output (convolution height stride).
205 /// @param subsampling_width - width of subsampling region for convolution output (convolution width stride).
206 /// @param dilation_height - dilation of kernel elements along the height dimension.
207 /// @param dilation_width - dilation of kernel elements along the width dimension.
208 /// @param groups - number of convolution groups.
209 /// @param group_input_channels - number of input channels per group.
210 /// @param group_output_channels - number of output channels per group.
211 /// @param output_min - lower bound for clipping output values.
212 /// @param output_max - upper bound for clipping output values.
213 /// @param input_id - Value ID for the input tensor. The input tensor must be a 4D tensor defined in the @a subgraph
214 ///                   with [N, IH, IW, groups * group_input_channels] dimensions
215 /// @param filter_id - Value ID for the filter tensor. The filter tensor must ge a 4D tensor defined in the @a subgraph
216 ///                    with [groups * group_output_channels, kernel_height, kernel_width, group_input_channels]
217 ///                    dimensions.
218 /// @param bias_id - Value ID for the bias tensor. The bias tensor must be a 1D tensor defined in the @a subgraph with
219 ///                  [groups * group_output_channels] dimensions.
220 /// @param output_id - Value ID for the output tensor. The output tensor must be a 4D tensor defined in the @a subgraph
221 ///                    with [N, OH, OW, groups * group_output_channels] dimensions.
222 /// @param flags - binary features of the 2D Convolution Node. The only currently supported values is
223 ///                XNN_FLAG_TENSORFLOW_SAME_PADDING.
224 enum xnn_status xnn_define_convolution_2d(
225   xnn_subgraph_t subgraph,
226   uint32_t input_padding_top,
227   uint32_t input_padding_right,
228   uint32_t input_padding_bottom,
229   uint32_t input_padding_left,
230   uint32_t kernel_height,
231   uint32_t kernel_width,
232   uint32_t subsampling_height,
233   uint32_t subsampling_width,
234   uint32_t dilation_height,
235   uint32_t dilation_width,
236   uint32_t groups,
237   size_t group_input_channels,
238   size_t group_output_channels,
239   float output_min,
240   float output_max,
241   uint32_t input_id,
242   uint32_t filter_id,
243   uint32_t bias_id,
244   uint32_t output_id,
245   uint32_t flags);
246 
247 /// Define a 2D Depthwise Convolution Node and add it to a Subgraph.
248 ///
249 /// @param subgraph - a Subgraph object that will own the created Node.
250 /// @param input_padding_top - implicit zero-padding above 2D input data.
251 /// @param input_padding_right - implicit zero-padding to the right of 2D input data.
252 /// @param input_padding_bottom - implicit zero-padding below 2D input data.
253 /// @param input_padding_left - implicit zero-padding to the left of 2D input data.
254 /// @param kernel_height - kernel (filter) height.
255 /// @param kernel_width - kernel (filter) width.
256 /// @param subsampling_height - height of subsampling region for convolution output (convolution height stride).
257 /// @param subsampling_width - width of subsampling region for convolution output (convolution width stride).
258 /// @param dilation_height - dilation of kernel elements along the height dimension.
259 /// @param dilation_width - dilation of kernel elements along the width dimension.
260 /// @param depth_multiplier - ratio of output channels to input channels.
261 /// @param input_channels - number of input channels.
262 /// @param output_min - lower bound for clipping output values.
263 /// @param output_max - upper bound for clipping output values.
264 /// @param input_id - Value ID for the input tensor. The input tensor must be a 4D tensor defined in the @a subgraph
265 ///                   with [N, IH, IW, input_channels] dimensions
266 /// @param filter_id - Value ID for the filter tensor. The filter tensor must ge a 4D tensor defined in the @a subgraph
267 ///                    with [1, kernel_height, kernel_width, input_channels * depth_multiplier] dimensions.
268 /// @param bias_id - Value ID for the bias tensor. The bias tensor must be a 1D tensor defined in the @a subgraph with
269 ///                  [input_channels * depth_multiplier] dimensions.
270 /// @param output_id - Value ID for the output tensor. The output tensor must be a 4D tensor defined in the @a subgraph
271 ///                    with [N, OH, OW, input_channels * depth_multiplier] dimensions.
272 /// @param flags - binary features of the 2D Depthwise Convolution Node. The only currently supported values is
273 ///                XNN_FLAG_TENSORFLOW_SAME_PADDING.
274 enum xnn_status xnn_define_depthwise_convolution_2d(
275   xnn_subgraph_t subgraph,
276   uint32_t input_padding_top,
277   uint32_t input_padding_right,
278   uint32_t input_padding_bottom,
279   uint32_t input_padding_left,
280   uint32_t kernel_height,
281   uint32_t kernel_width,
282   uint32_t subsampling_height,
283   uint32_t subsampling_width,
284   uint32_t dilation_height,
285   uint32_t dilation_width,
286   uint32_t depth_multiplier,
287   size_t input_channels,
288   float output_min,
289   float output_max,
290   uint32_t input_id,
291   uint32_t filter_id,
292   uint32_t bias_id,
293   uint32_t output_id,
294   uint32_t flags);
295 
296 /// Define a 2-Input Add Node and add it to a Subgraph.
297 ///
298 /// The 2-Input Add Node computes elementwise addition of two tensor inputs with numpy broadcasting rules.
299 ///
300 /// @param subgraph - a Subgraph object that will own the created Node.
301 /// @param output_min - lower bound for clipping output values.
302 /// @param output_max - upper bound for clipping output values.
303 /// @param input1_id - Value ID for the first input tensor. The input tensor must be an N-dimensional tensor defined in
304 ///                    the @a subgraph with each dimension either equal to the corresponding dimension of the second
305 ///                    input, or equal to 1. In the latter case, the elements of the input tensor are broadcasted along
306 ///                    that dimension.
307 /// @param input2_id - Value ID for the second input tensor. The input tensor must be an M-dimensional tensor defined in
308 ///                    the @a subgraph with each dimension either equal to the corresponding dimension of the first
309 ///                    input, or equal to 1. In the latter case, the elements of the input tensor are broadcasted along
310 ///                    that dimension.
311 /// @param output_id - Value ID for the output tensor. The output tensor must be a max(N,M)-dimensional tensor defined
312 ///                    in the @a subgraph with each dimension equal to the maximum between the corresponding dimension
313 ///                    of the two inputs.
314 /// @param flags - binary features of the Add Node. No supported flags are currently defined.
315 enum xnn_status xnn_define_add2(
316   xnn_subgraph_t subgraph,
317   float output_min,
318   float output_max,
319   uint32_t input1_id,
320   uint32_t input2_id,
321   uint32_t output_id,
322   uint32_t flags);
323 
324 /// Define a 2-Input Multiply Node and add it to a Subgraph.
325 ///
326 /// The 2-Input Multiply Node computes elementwise multiplication of two tensor inputs with numpy broadcasting rules.
327 ///
328 /// @param subgraph - a Subgraph object that will own the created Node.
329 /// @param output_min - lower bound for clipping output values.
330 /// @param output_max - upper bound for clipping output values.
331 /// @param input1_id - Value ID for the first input tensor. The input tensor must be an N-dimensional tensor defined in
332 ///                    the @a subgraph with each dimension either equal to the corresponding dimension of the second
333 ///                    input, or equal to 1. In the latter case, the elements of the input tensor are broadcasted along
334 ///                    that dimension.
335 /// @param input2_id - Value ID for the second input tensor. The input tensor must be an M-dimensional tensor defined in
336 ///                    the @a subgraph with each dimension either equal to the corresponding dimension of the first
337 ///                    input, or equal to 1. In the latter case, the elements of the input tensor are broadcasted along
338 ///                    that dimension.
339 /// @param output_id - Value ID for the output tensor. The output tensor must be a max(N,M)-dimensional tensor defined
340 ///                    in the @a subgraph with each dimension equal to the maximum between the corresponding dimension
341 ///                    of the two inputs.
342 /// @param flags - binary features of the Multiply Node. No supported flags are currently defined.
343 enum xnn_status xnn_define_multiply2(
344   xnn_subgraph_t subgraph,
345   float output_min,
346   float output_max,
347   uint32_t input1_id,
348   uint32_t input2_id,
349   uint32_t output_id,
350   uint32_t flags);
351 
352 /// Define a PReLU (Parametric ReLU) Node and add it to a Subgraph.
353 ///
354 /// @param subgraph - a Subgraph object that will own the created Node.
355 /// @param input_id - Value ID for the input tensor. The input tensor must be a 4D tensor defined in the @a subgraph
356 ///                   with [N, H, W, channels] dimensions
357 /// @param slope_id - Value ID for the bias tensor. The bias tensor must be a 1D tensor defined in the @a subgraph with
358 ///                   [channels] dimensions.
359 /// @param output_id - Value ID for the output tensor. The output tensor must be a 4D tensor defined in the @a subgraph
360 ///                    with [N, H, W, channels] dimensions.
361 /// @param flags - binary features of the PReLU Node. No supported flags are currently defined.
362 enum xnn_status xnn_define_prelu(
363   xnn_subgraph_t subgraph,
364   uint32_t input_id,
365   uint32_t slope_id,
366   uint32_t output_id,
367   uint32_t flags);
368 
369 /// Define a Clamp Node and add it to a Subgraph.
370 ///
371 /// @param subgraph - a Subgraph object that will own the created Node.
372 /// @param output_min - lower bound for clipping output values.
373 /// @param output_max - upper bound for clipping output values.
374 /// @param input_id - Value ID for the input tensor. The input tensor must be defined in the @a subgraph.
375 /// @param output_id - Value ID for the output tensor. The output tensor must be defined in the @a subgraph, and its
376 ///                    shape must match the shape of the input tensor.
377 /// @param flags - binary features of the Clamp Node. No supported flags are currently defined.
378 enum xnn_status xnn_define_clamp(
379   xnn_subgraph_t subgraph,
380   float output_min,
381   float output_max,
382   uint32_t input_id,
383   uint32_t output_id,
384   uint32_t flags);
385 
386 /// Define a HardSwish Node and add it to a Subgraph.
387 ///
388 /// @param subgraph - a Subgraph object that will own the created Node.
389 /// @param input_id - Value ID for the input tensor. The input tensor must be defined in the @a subgraph.
390 /// @param output_id - Value ID for the output tensor. The output tensor must be defined in the @a subgraph, and its
391 ///                    shape must match the shape of the input tensor.
392 /// @param flags - binary features of the HardSwish Node. No supported flags are currently defined.
393 enum xnn_status xnn_define_hardswish(
394   xnn_subgraph_t subgraph,
395   uint32_t input_id,
396   uint32_t output_id,
397   uint32_t flags);
398 
399 /// Define a Sigmoid Node and add it to a Subgraph.
400 ///
401 /// @param subgraph - a Subgraph object that will own the created Node.
402 /// @param input_id - Value ID for the input tensor. The input tensor must be defined in the @a subgraph.
403 /// @param output_id - Value ID for the output tensor. The output tensor must be defined in the @a subgraph, and its
404 ///                    shape must match the shape of the input tensor.
405 /// @param flags - binary features of the Sigmoid Node. No supported flags are currently defined.
406 enum xnn_status xnn_define_sigmoid(
407   xnn_subgraph_t subgraph,
408   uint32_t input_id,
409   uint32_t output_id,
410   uint32_t flags);
411 
412 /// Define a SoftMax Node and add it to a Subgraph.
413 ///
414 /// @param subgraph - a Subgraph object that will own the created Node.
415 /// @param input_id - Value ID for the input tensor. The input tensor must be defined in the @a subgraph, and have at
416 ///                   least one dimension.
417 /// @param output_id - Value ID for the output tensor. The output tensor must be defined in the @a subgraph, and its
418 ///                    shape must match the shape of the input tensor.
419 /// @param flags - binary features of the SoftMax Node. No supported flags are currently defined.
420 enum xnn_status xnn_define_softmax(
421   xnn_subgraph_t subgraph,
422   uint32_t input_id,
423   uint32_t output_id,
424   uint32_t flags);
425 
426 /// Runtime is a combination of an execution plan for subgraph Nodes and a memory manager for subgraph Values.
427 typedef struct xnn_runtime* xnn_runtime_t;
428 
429 /// Create a empty Runtime object from a subgraph.
430 ///
431 /// @param subgraph - a Subgraph object with all Values and Nodes that would be handled by the runtime. No Values or
432 ///                   Nodes can be added to the runtime once it is constructed.
433 /// @param threadpool - the thread pool to be used for parallelisation of computations in the runtime. If the thread
434 ///                     pool is NULL, the computation would run on the caller thread without parallelization.
435 /// @param flags - binary features of the subgraph. No supported flags are currently defined.
436 /// @param runtime_out - pointer to the variable that will be initialized with a handle to the Runtime object upon
437 ///                      successful return. Once constructed, the Runtime object is independent of the Subgraph object
438 ///                      used to create it.
439 enum xnn_status xnn_create_runtime_v2(
440   xnn_subgraph_t subgraph,
441   pthreadpool_t threadpool,
442   uint32_t flags,
443   xnn_runtime_t* runtime_out);
444 
445 enum xnn_status xnn_create_runtime(
446   xnn_subgraph_t subgraph,
447   xnn_runtime_t* runtime_out);
448 
449 struct xnn_external_value {
450   uint32_t id;
451   void* data;
452 };
453 
454 /// Setup data pointers for external inputs and outputs in a Runtime object.
455 ///
456 /// @param runtime - a Runtime object created with @ref xnn_create_runtime or @ref xnn_create_runtime_v2.
457 /// @param num_external_values - the number of external inputs and outputs specified in this call. This number must
458 ///                              match the number of external inputs and outputs in the runtime, i.e. all external
459 ///                              inputs and outputs in the runtime must be specified in one call.
460 /// @param external_values - array with location information for all external inputs and outputs in the runtime.
461 enum xnn_status xnn_setup_runtime(
462   xnn_runtime_t runtime,
463   size_t num_external_values,
464   const struct xnn_external_value* external_values);
465 
466 /// Execute forward pass for all operators in the runtime.
467 ///
468 /// @param runtime - the Runtime object with the execution plan to invoke.
469 enum xnn_status xnn_invoke_runtime(
470   xnn_runtime_t runtime);
471 
472 /// Destroy a Runtime object, as well as operators and memory associated with it.
473 ///
474 /// @param runtime - the Runtime object to destroy.
475 enum xnn_status xnn_delete_runtime(
476   xnn_runtime_t runtime);
477 
478 typedef struct xnn_operator* xnn_operator_t;
479 
480 enum xnn_status xnn_run_operator(
481   xnn_operator_t op,
482   pthreadpool_t threadpool);
483 
484 enum xnn_status xnn_delete_operator(
485   xnn_operator_t op);
486 
487 #ifndef XNN_NO_F32_OPERATORS
488 
489 enum xnn_status xnn_create_add_nc_f32(
490   size_t channels,
491   size_t a_stride,
492   size_t b_stride,
493   size_t sum_stride,
494   float sum_min,
495   float sum_max,
496   uint32_t flags,
497   xnn_operator_t* add_op_out);
498 
499 enum xnn_status xnn_setup_add_nc_f32(
500   xnn_operator_t add_op,
501   size_t batch_size,
502   const float* a,
503   const float* b,
504   float* sum,
505   pthreadpool_t threadpool);
506 
507 enum xnn_status xnn_create_add_nd_f32(
508   float output_min,
509   float output_max,
510   uint32_t flags,
511   xnn_operator_t* add_op_out);
512 
513 enum xnn_status xnn_setup_add_nd_f32(
514   xnn_operator_t add_op,
515   size_t num_input1_dims,
516   const size_t* input1_shape,
517   size_t num_input2_dims,
518   const size_t* input2_shape,
519   const float* input1,
520   const float* input2,
521   float* output,
522   pthreadpool_t threadpool);
523 
524 enum xnn_status xnn_create_argmax_pooling2d_nhwc_f32(
525   uint32_t input_padding_top,
526   uint32_t input_padding_right,
527   uint32_t input_padding_bottom,
528   uint32_t input_padding_left,
529   uint32_t pooling_height,
530   uint32_t pooling_width,
531   size_t channels,
532   size_t input_pixel_stride,
533   size_t output_pixel_stride,
534   float output_min,
535   float output_max,
536   uint32_t flags,
537   xnn_operator_t* argmax_pooling_op_out);
538 
539 enum xnn_status xnn_setup_argmax_pooling2d_nhwc_f32(
540   xnn_operator_t argmax_pooling_op,
541   size_t batch_size,
542   size_t input_height,
543   size_t input_width,
544   const float* input,
545   float* output,
546   uint32_t* index,
547   pthreadpool_t threadpool);
548 
549 enum xnn_status xnn_create_average_pooling2d_nhwc_f32(
550   uint32_t input_padding_top,
551   uint32_t input_padding_right,
552   uint32_t input_padding_bottom,
553   uint32_t input_padding_left,
554   uint32_t pooling_height,
555   uint32_t pooling_width,
556   uint32_t stride_height,
557   uint32_t stride_width,
558   size_t channels,
559   size_t input_pixel_stride,
560   size_t output_pixel_stride,
561   float output_min,
562   float output_max,
563   uint32_t flags,
564   xnn_operator_t* average_pooling_op_out);
565 
566 enum xnn_status xnn_setup_average_pooling2d_nhwc_f32(
567   xnn_operator_t average_pooling_op,
568   size_t batch_size,
569   size_t input_height,
570   size_t input_width,
571   const float* input,
572   float* output,
573   pthreadpool_t threadpool);
574 
575 enum xnn_status xnn_create_clamp_nc_f32(
576   size_t channels,
577   size_t input_stride,
578   size_t output_stride,
579   float output_min,
580   float output_max,
581   uint32_t flags,
582   xnn_operator_t* clamp_op_out);
583 
584 enum xnn_status xnn_setup_clamp_nc_f32(
585   xnn_operator_t clamp_op,
586   size_t batch_size,
587   const float* input,
588   float* output,
589   pthreadpool_t threadpool);
590 
591 enum xnn_status xnn_create_convolution2d_nhwc_f32(
592   uint32_t input_padding_top,
593   uint32_t input_padding_right,
594   uint32_t input_padding_bottom,
595   uint32_t input_padding_left,
596   uint32_t kernel_height,
597   uint32_t kernel_width,
598   uint32_t subsampling_height,
599   uint32_t subsampling_width,
600   uint32_t dilation_height,
601   uint32_t dilation_width,
602   uint32_t groups,
603   size_t group_input_channels,
604   size_t group_output_channels,
605   size_t input_pixel_stride,
606   size_t output_pixel_stride,
607   const float* kernel,
608   const float* bias,
609   float output_min,
610   float output_max,
611   uint32_t flags,
612   xnn_operator_t* convolution_op_out);
613 
614 enum xnn_status xnn_setup_convolution2d_nhwc_f32(
615   xnn_operator_t convolution_op,
616   size_t batch_size,
617   size_t input_height,
618   size_t input_width,
619   const float* input,
620   float* output,
621   pthreadpool_t threadpool);
622 
623 enum xnn_status xnn_create_deconvolution2d_nhwc_f32(
624   uint32_t output_padding_top,
625   uint32_t output_padding_right,
626   uint32_t output_padding_bottom,
627   uint32_t output_padding_left,
628   uint32_t kernel_height,
629   uint32_t kernel_width,
630   uint32_t stride_height,
631   uint32_t stride_width,
632   uint32_t dilation_height,
633   uint32_t dilation_width,
634   uint32_t groups,
635   size_t group_input_channels,
636   size_t group_output_channels,
637   size_t input_pixel_stride,
638   size_t output_pixel_stride,
639   const float* kernel,
640   const float* bias,
641   float output_min,
642   float output_max,
643   uint32_t flags,
644   xnn_operator_t* deconvolution_op_out);
645 
646 enum xnn_status xnn_setup_deconvolution2d_nhwc_f32(
647   xnn_operator_t deconvolution_op,
648   size_t batch_size,
649   size_t input_height,
650   size_t input_width,
651   uint32_t adjustment_height,
652   uint32_t adjustment_width,
653   const float* input,
654   float* output,
655   pthreadpool_t threadpool);
656 
657 enum xnn_status xnn_create_divide_nd_f32(
658   float output_min,
659   float output_max,
660   uint32_t flags,
661   xnn_operator_t* divide_op_out);
662 
663 enum xnn_status xnn_setup_divide_nd_f32(
664   xnn_operator_t divide_op,
665   size_t num_input1_dims,
666   const size_t* input1_shape,
667   size_t num_input2_dims,
668   const size_t* input2_shape,
669   const float* input1,
670   const float* input2,
671   float* output,
672   pthreadpool_t threadpool);
673 
674 enum xnn_status xnn_create_fully_connected_nc_f32(
675   size_t input_channels,
676   size_t output_channels,
677   size_t input_stride,
678   size_t output_stride,
679   const float* kernel,
680   const float* bias,
681   float output_min,
682   float output_max,
683   uint32_t flags,
684   xnn_operator_t* fully_connected_op_out);
685 
686 enum xnn_status xnn_setup_fully_connected_nc_f32(
687   xnn_operator_t fully_connected_op,
688   size_t batch_size,
689   const float* input,
690   float* output,
691   pthreadpool_t threadpool);
692 
693 enum xnn_status xnn_create_global_average_pooling_nwc_f32(
694   size_t channels,
695   size_t input_stride,
696   size_t output_stride,
697   float output_min,
698   float output_max,
699   uint32_t flags,
700   xnn_operator_t* global_average_pooling_op_out);
701 
702 enum xnn_status xnn_setup_global_average_pooling_nwc_f32(
703   xnn_operator_t global_average_pooling_op,
704   size_t batch_size,
705   size_t width,
706   const float* input,
707   float* output,
708   pthreadpool_t threadpool);
709 
710 enum xnn_status xnn_create_hardswish_nc_f32(
711   size_t channels,
712   size_t input_stride,
713   size_t output_stride,
714   uint32_t flags,
715   xnn_operator_t* hardswish_op_out);
716 
717 enum xnn_status xnn_setup_hardswish_nc_f32(
718   xnn_operator_t hardswish_op,
719   size_t batch_size,
720   const float* input,
721   float* output,
722   pthreadpool_t threadpool);
723 
724 enum xnn_status xnn_create_max_pooling2d_nhwc_f32(
725   uint32_t input_padding_top,
726   uint32_t input_padding_right,
727   uint32_t input_padding_bottom,
728   uint32_t input_padding_left,
729   uint32_t pooling_height,
730   uint32_t pooling_width,
731   uint32_t stride_height,
732   uint32_t stride_width,
733   uint32_t dilation_height,
734   uint32_t dilation_width,
735   size_t channels,
736   size_t input_pixel_stride,
737   size_t output_pixel_stride,
738   float output_min,
739   float output_max,
740   uint32_t flags,
741   xnn_operator_t* max_pooling_op_out);
742 
743 enum xnn_status xnn_setup_max_pooling2d_nhwc_f32(
744   xnn_operator_t max_pooling_op,
745   size_t batch_size,
746   size_t input_height,
747   size_t input_width,
748   const float* input,
749   float* output,
750   pthreadpool_t threadpool);
751 
752 enum xnn_status xnn_create_maximum_nd_f32(
753   uint32_t flags,
754   xnn_operator_t* maximum_op_out);
755 
756 enum xnn_status xnn_setup_maximum_nd_f32(
757   xnn_operator_t maximum_op,
758   size_t num_input1_dims,
759   const size_t* input1_shape,
760   size_t num_input2_dims,
761   const size_t* input2_shape,
762   const float* input1,
763   const float* input2,
764   float* output,
765   pthreadpool_t threadpool);
766 
767 enum xnn_status xnn_create_minimum_nd_f32(
768   uint32_t flags,
769   xnn_operator_t* minimum_op_out);
770 
771 enum xnn_status xnn_setup_minimum_nd_f32(
772   xnn_operator_t minimum_op,
773   size_t num_input1_dims,
774   const size_t* input1_shape,
775   size_t num_input2_dims,
776   const size_t* input2_shape,
777   const float* input1,
778   const float* input2,
779   float* output,
780   pthreadpool_t threadpool);
781 
782 enum xnn_status xnn_create_multiply_nd_f32(
783   float output_min,
784   float output_max,
785   uint32_t flags,
786   xnn_operator_t* multiply_op_out);
787 
788 enum xnn_status xnn_setup_multiply_nd_f32(
789   xnn_operator_t multiply_op,
790   size_t num_input1_dims,
791   const size_t* input1_shape,
792   size_t num_input2_dims,
793   const size_t* input2_shape,
794   const float* input1,
795   const float* input2,
796   float* output,
797   pthreadpool_t threadpool);
798 
799 enum xnn_status xnn_create_prelu_nc_f32(
800   size_t channels,
801   size_t input_stride,
802   size_t output_stride,
803   const float* negative_slope,
804   float output_min,
805   float output_max,
806   uint32_t flags,
807   xnn_operator_t* prelu_op_out);
808 
809 enum xnn_status xnn_setup_prelu_nc_f32(
810   xnn_operator_t prelu_op,
811   size_t batch_size,
812   const float* input,
813   float* output,
814   pthreadpool_t threadpool);
815 
816 enum xnn_status xnn_create_resize_bilinear2d_nhwc_f32(
817   size_t channels,
818   size_t input_pixel_stride,
819   size_t output_pixel_stride,
820   uint32_t flags,
821   xnn_operator_t* resize_op_out);
822 
823 enum xnn_status xnn_setup_resize_bilinear2d_nhwc_f32(
824   xnn_operator_t resize_op,
825   size_t batch_size,
826   size_t input_height,
827   size_t input_width,
828   size_t output_height,
829   size_t output_width,
830   const float* input,
831   float* output,
832   pthreadpool_t threadpool);
833 
834 enum xnn_status xnn_create_sigmoid_nc_f32(
835   size_t channels,
836   size_t input_stride,
837   size_t output_stride,
838   uint32_t flags,
839   xnn_operator_t* sigmoid_op_out);
840 
841 enum xnn_status xnn_setup_sigmoid_nc_f32(
842   xnn_operator_t sigmoid_op,
843   size_t batch_size,
844   const float* input,
845   float* output,
846   pthreadpool_t threadpool);
847 
848 enum xnn_status xnn_create_softmax_nc_f32(
849   size_t channels,
850   size_t input_stride,
851   size_t output_stride,
852   uint32_t flags,
853   xnn_operator_t* softmax_op_out);
854 
855 enum xnn_status xnn_setup_softmax_nc_f32(
856   xnn_operator_t softmax_op,
857   size_t batch_size,
858   const float* input,
859   float* output,
860   pthreadpool_t threadpool);
861 
862 enum xnn_status xnn_create_subtract_nd_f32(
863   float output_min,
864   float output_max,
865   uint32_t flags,
866   xnn_operator_t* subtract_op_out);
867 
868 enum xnn_status xnn_setup_subtract_nd_f32(
869   xnn_operator_t subtract_op,
870   size_t num_input1_dims,
871   const size_t* input1_shape,
872   size_t num_input2_dims,
873   const size_t* input2_shape,
874   const float* input1,
875   const float* input2,
876   float* output,
877   pthreadpool_t threadpool);
878 
879 #ifndef XNN_NO_NCHW_OPERATORS
880 
881 enum xnn_status xnn_create_convolution2d_nchw_f32(
882   uint32_t input_padding_top,
883   uint32_t input_padding_right,
884   uint32_t input_padding_bottom,
885   uint32_t input_padding_left,
886   uint32_t kernel_height,
887   uint32_t kernel_width,
888   uint32_t subsampling_height,
889   uint32_t subsampling_width,
890   uint32_t dilation_height,
891   uint32_t dilation_width,
892   uint32_t groups,
893   size_t group_input_channels,
894   size_t group_output_channels,
895   const float* kernel,
896   const float* bias,
897   float output_min,
898   float output_max,
899   uint32_t flags,
900   xnn_operator_t* convolution_op_out);
901 
902 enum xnn_status xnn_setup_convolution2d_nchw_f32(
903   xnn_operator_t convolution_op,
904   size_t batch_size,
905   size_t input_batch_stride,
906   size_t output_batch_stride,
907   size_t input_height,
908   size_t input_width,
909   const float* input,
910   float* output,
911   pthreadpool_t threadpool);
912 
913 enum xnn_status xnn_create_global_average_pooling_ncw_f32(
914   size_t channels,
915   float output_min,
916   float output_max,
917   uint32_t flags,
918   xnn_operator_t* global_average_pooling_op_out);
919 
920 enum xnn_status xnn_setup_global_average_pooling_ncw_f32(
921   xnn_operator_t global_average_pooling_op,
922   size_t batch_size,
923   size_t width,
924   const float* input,
925   float* output,
926   pthreadpool_t threadpool);
927 
928 #endif  // XNN_NO_NCHW_OPERATORS
929 
930 #endif  // XNN_NO_F32_OPERATORS
931 
932 #ifndef XNN_NO_X32_OPERATORS
933 
934 enum xnn_status xnn_create_channel_pad_nc_x32(
935   size_t input_channels,
936   size_t pad_before_channels,
937   size_t pad_after_channels,
938   size_t input_stride,
939   size_t output_stride,
940   const void* pad_value,
941   uint32_t flags,
942   xnn_operator_t* channel_pad_op_out);
943 
944 enum xnn_status xnn_setup_channel_pad_nc_x32(
945   xnn_operator_t channel_pad_op,
946   size_t batch_size,
947   const void* input,
948   void* output,
949   pthreadpool_t threadpool);
950 
951 enum xnn_status xnn_create_channel_shuffle_nc_x32(
952   size_t groups,
953   size_t group_channels,
954   size_t input_stride,
955   size_t output_stride,
956   uint32_t flags,
957   xnn_operator_t* channel_shuffle_op_out);
958 
959 enum xnn_status xnn_setup_channel_shuffle_nc_x32(
960   xnn_operator_t channel_shuffle_op,
961   size_t batch_size,
962   const void* input,
963   void* output,
964   pthreadpool_t threadpool);
965 
966 enum xnn_status xnn_create_unpooling2d_nhwc_x32(
967   uint32_t input_padding_top,
968   uint32_t input_padding_right,
969   uint32_t input_padding_bottom,
970   uint32_t input_padding_left,
971   uint32_t pooling_height,
972   uint32_t pooling_width,
973   size_t channels,
974   size_t input_pixel_stride,
975   size_t output_pixel_stride,
976   uint32_t flags,
977   xnn_operator_t* unpooling_op_out);
978 
979 enum xnn_status xnn_setup_unpooling2d_nhwc_x32(
980   xnn_operator_t unpooling_op,
981   size_t batch_size,
982   size_t input_height,
983   size_t input_width,
984   const void* input,
985   const uint32_t* index,
986   void* output,
987   pthreadpool_t threadpool);
988 
989 #endif  // XNN_NO_X32_OPERATORS
990 
991 #ifndef XNN_NO_Q8_OPERATORS
992 
993 enum xnn_status xnn_create_add_nc_q8(
994   size_t channels,
995   size_t a_stride,
996   size_t b_stride,
997   size_t sum_stride,
998   uint8_t a_zero_point,
999   float a_scale,
1000   uint8_t b_zero_point,
1001   float b_scale,
1002   uint8_t sum_zero_point,
1003   float sum_scale,
1004   uint8_t sum_min,
1005   uint8_t sum_max,
1006   uint32_t flags,
1007   xnn_operator_t* add_op_out);
1008 
1009 enum xnn_status xnn_setup_add_nc_q8(
1010   xnn_operator_t add_op,
1011   size_t batch_size,
1012   const uint8_t* a,
1013   const uint8_t* b,
1014   uint8_t* sum,
1015   pthreadpool_t threadpool);
1016 
1017 enum xnn_status xnn_create_average_pooling2d_nhwc_q8(
1018   uint32_t input_padding_top,
1019   uint32_t input_padding_right,
1020   uint32_t input_padding_bottom,
1021   uint32_t input_padding_left,
1022   uint32_t pooling_height,
1023   uint32_t pooling_width,
1024   uint32_t stride_height,
1025   uint32_t stride_width,
1026   size_t channels,
1027   size_t input_pixel_stride,
1028   size_t output_pixel_stride,
1029   uint8_t input_zero_point,
1030   float input_scale,
1031   uint8_t output_zero_point,
1032   float output_scale,
1033   uint8_t output_min,
1034   uint8_t output_max,
1035   uint32_t flags,
1036   xnn_operator_t* average_pooling_op_out);
1037 
1038 enum xnn_status xnn_setup_average_pooling2d_nhwc_q8(
1039   xnn_operator_t average_pooling_op,
1040   size_t batch_size,
1041   size_t input_height,
1042   size_t input_width,
1043   const uint8_t* input,
1044   uint8_t* output,
1045   pthreadpool_t threadpool);
1046 
1047 enum xnn_status xnn_create_convolution2d_nhwc_q8(
1048   uint32_t input_padding_top,
1049   uint32_t input_padding_right,
1050   uint32_t input_padding_bottom,
1051   uint32_t input_padding_left,
1052   uint32_t kernel_height,
1053   uint32_t kernel_width,
1054   uint32_t subsampling_height,
1055   uint32_t subsampling_width,
1056   uint32_t dilation_height,
1057   uint32_t dilation_width,
1058   uint32_t groups,
1059   size_t group_input_channels,
1060   size_t group_output_channels,
1061   size_t input_pixel_stride,
1062   size_t output_pixel_stride,
1063   uint8_t input_zero_point,
1064   float input_scale,
1065   uint8_t kernel_zero_point,
1066   float kernel_scale,
1067   const uint8_t* kernel,
1068   const int32_t* bias,
1069   uint8_t output_zero_point,
1070   float output_scale,
1071   uint8_t output_min,
1072   uint8_t output_max,
1073   uint32_t flags,
1074   xnn_operator_t* convolution_op_out);
1075 
1076 enum xnn_status xnn_setup_convolution2d_nhwc_q8(
1077   xnn_operator_t convolution_op,
1078   size_t batch_size,
1079   size_t input_height,
1080   size_t input_width,
1081   const uint8_t* input,
1082   uint8_t* output,
1083   pthreadpool_t threadpool);
1084 
1085 enum xnn_status xnn_create_deconvolution2d_nhwc_q8(
1086   uint32_t output_padding_top,
1087   uint32_t output_padding_right,
1088   uint32_t output_padding_bottom,
1089   uint32_t output_padding_left,
1090   uint32_t kernel_height,
1091   uint32_t kernel_width,
1092   uint32_t stride_height,
1093   uint32_t stride_width,
1094   uint32_t dilation_height,
1095   uint32_t dilation_width,
1096   uint32_t groups,
1097   size_t group_input_channels,
1098   size_t group_output_channels,
1099   size_t input_pixel_stride,
1100   size_t output_pixel_stride,
1101   uint8_t input_zero_point,
1102   float input_scale,
1103   uint8_t kernel_zero_point,
1104   float kernel_scale,
1105   const uint8_t* kernel,
1106   const int32_t* bias,
1107   uint8_t output_zero_point,
1108   float output_scale,
1109   uint8_t output_min,
1110   uint8_t output_max,
1111   uint32_t flags,
1112   xnn_operator_t* deconvolution_op_out);
1113 
1114 enum xnn_status xnn_setup_deconvolution2d_nhwc_q8(
1115   xnn_operator_t deconvolution_op,
1116   size_t batch_size,
1117   size_t input_height,
1118   size_t input_width,
1119   uint32_t adjustment_height,
1120   uint32_t adjustment_width,
1121   const uint8_t* input,
1122   uint8_t* output,
1123   pthreadpool_t threadpool);
1124 
1125 enum xnn_status xnn_create_fully_connected_nc_q8(
1126   size_t input_channels,
1127   size_t output_channels,
1128   size_t input_stride,
1129   size_t output_stride,
1130   uint8_t input_zero_point,
1131   float input_scale,
1132   uint8_t kernel_zero_point,
1133   float kernel_scale,
1134   const uint8_t* kernel,
1135   const int32_t* bias,
1136   uint8_t output_zero_point,
1137   float output_scale,
1138   uint8_t output_min,
1139   uint8_t output_max,
1140   uint32_t flags,
1141   xnn_operator_t* fully_connected_op_out);
1142 
1143 enum xnn_status xnn_setup_fully_connected_nc_q8(
1144   xnn_operator_t fully_connected_op,
1145   size_t batch_size,
1146   const uint8_t* input,
1147   uint8_t* output,
1148   pthreadpool_t threadpool);
1149 
1150 enum xnn_status xnn_create_global_average_pooling_nwc_q8(
1151   size_t channels,
1152   size_t input_stride,
1153   size_t output_stride,
1154   uint8_t input_zero_point,
1155   float input_scale,
1156   uint8_t output_zero_point,
1157   float output_scale,
1158   uint8_t output_min,
1159   uint8_t output_max,
1160   uint32_t flags,
1161   xnn_operator_t* global_average_pooling_op_out);
1162 
1163 enum xnn_status xnn_setup_global_average_pooling_nwc_q8(
1164   xnn_operator_t global_average_pooling_op,
1165   size_t batch_size,
1166   size_t width,
1167   const uint8_t* input,
1168   uint8_t* output,
1169   pthreadpool_t threadpool);
1170 
1171 enum xnn_status xnn_create_leaky_relu_nc_q8(
1172   size_t channels,
1173   size_t input_stride,
1174   size_t output_stride,
1175   float negative_slope,
1176   uint8_t input_zero_point,
1177   float input_scale,
1178   uint8_t output_zero_point,
1179   float output_scale,
1180   uint8_t output_min,
1181   uint8_t output_max,
1182   uint32_t flags,
1183   xnn_operator_t* leaky_relu_op_out);
1184 
1185 enum xnn_status xnn_setup_leaky_relu_nc_q8(
1186   xnn_operator_t leaky_relu_op,
1187   size_t batch_size,
1188   const uint8_t* input,
1189   uint8_t* output,
1190   pthreadpool_t threadpool);
1191 
1192 enum xnn_status xnn_create_sigmoid_nc_q8(
1193   size_t channels,
1194   size_t input_stride,
1195   size_t output_stride,
1196   uint8_t input_zero_point,
1197   float input_scale,
1198   uint8_t output_zero_point,
1199   float output_scale,
1200   uint8_t output_min,
1201   uint8_t output_max,
1202   uint32_t flags,
1203   xnn_operator_t* sigmoid_op_out);
1204 
1205 enum xnn_status xnn_setup_sigmoid_nc_q8(
1206   xnn_operator_t sigmoid_op,
1207   size_t batch_size,
1208   const uint8_t* input,
1209   uint8_t* output,
1210   pthreadpool_t threadpool);
1211 
1212 enum xnn_status xnn_create_softmax_nc_q8(
1213   size_t channels,
1214   size_t input_stride,
1215   size_t output_stride,
1216   float input_scale,
1217   uint8_t output_zero_point,
1218   float output_scale,
1219   uint32_t flags,
1220   xnn_operator_t* softmax_op_out);
1221 
1222 enum xnn_status xnn_setup_softmax_nc_q8(
1223   xnn_operator_t softmax_op,
1224   size_t batch_size,
1225   const uint8_t* input,
1226   uint8_t* output,
1227   pthreadpool_t threadpool);
1228 
1229 #endif  // XNN_NO_Q8_OPERATORS
1230 
1231 #ifndef XNN_NO_U8_OPERATORS
1232 
1233 enum xnn_status xnn_create_clamp_nc_u8(
1234   size_t channels,
1235   size_t input_stride,
1236   size_t output_stride,
1237   uint8_t output_min,
1238   uint8_t output_max,
1239   uint32_t flags,
1240   xnn_operator_t* clamp_op_out);
1241 
1242 enum xnn_status xnn_setup_clamp_nc_u8(
1243   xnn_operator_t clamp_op,
1244   size_t batch_size,
1245   const uint8_t* input,
1246   uint8_t* output,
1247   pthreadpool_t threadpool);
1248 
1249 enum xnn_status xnn_create_max_pooling2d_nhwc_u8(
1250   uint32_t input_padding_top,
1251   uint32_t input_padding_right,
1252   uint32_t input_padding_bottom,
1253   uint32_t input_padding_left,
1254   uint32_t pooling_height,
1255   uint32_t pooling_width,
1256   uint32_t stride_height,
1257   uint32_t stride_width,
1258   uint32_t dilation_height,
1259   uint32_t dilation_width,
1260   size_t channels,
1261   size_t input_pixel_stride,
1262   size_t output_pixel_stride,
1263   uint8_t output_min,
1264   uint8_t output_max,
1265   uint32_t flags,
1266   xnn_operator_t* max_pooling_op_out);
1267 
1268 enum xnn_status xnn_setup_max_pooling2d_nhwc_u8(
1269   xnn_operator_t max_pooling_op,
1270   size_t batch_size,
1271   size_t input_height,
1272   size_t input_width,
1273   const uint8_t* input,
1274   uint8_t* output,
1275   pthreadpool_t threadpool);
1276 
1277 #endif  // XNN_NO_U8_OPERATORS
1278 
1279 #ifndef XNN_NO_X8_OPERATORS
1280 
1281 enum xnn_status xnn_create_channel_shuffle_nc_x8(
1282   size_t groups,
1283   size_t group_channels,
1284   size_t input_stride,
1285   size_t output_stride,
1286   uint32_t flags,
1287   xnn_operator_t* channel_shuffle_op_out);
1288 
1289 enum xnn_status xnn_setup_channel_shuffle_nc_x8(
1290   xnn_operator_t channel_shuffle_op,
1291   size_t batch_size,
1292   const void* input,
1293   void* output,
1294   pthreadpool_t threadpool);
1295 
1296 #endif  // XNN_NO_X8_OPERATORS
1297 
1298 #ifdef __cplusplus
1299 }  // extern "C"
1300 #endif
1301