• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (c) Facebook, Inc. and its affiliates.
2 // All rights reserved.
3 //
4 // Copyright 2019 Google LLC
5 //
6 // This source code is licensed under the BSD-style license found in the
7 // LICENSE file in the root directory of this source tree.
8 
9 #pragma once
10 
11 #include <stdbool.h>
12 #include <stddef.h>
13 #include <stdint.h>
14 
15 #include <xnnpack.h>
16 #include <xnnpack/common.h>
17 #include <xnnpack/microfnptr.h>
18 #include <xnnpack/microparams.h>
19 
20 
21 struct xnn_hmp_gemm_ukernel {
22   xnn_gemm_ukernel_function function[XNN_MAX_UARCH_TYPES];
23 #if XNN_PLATFORM_JIT
24   size_t generated_code_offset[XNN_MAX_UARCH_TYPES];
25 #endif  // XNN_PLATFORM_JIT
26 };
27 
xnn_init_hmp_gemm_ukernel(xnn_gemm_ukernel_function function)28 static inline struct xnn_hmp_gemm_ukernel xnn_init_hmp_gemm_ukernel(xnn_gemm_ukernel_function function) {
29   struct xnn_hmp_gemm_ukernel ukernel = {{ function }};
30   for (size_t i = 1; i < XNN_MAX_UARCH_TYPES; i++) {
31     ukernel.function[i] = function;
32 #if XNN_PLATFORM_JIT
33     ukernel.generated_code_offset[i] = SIZE_MAX;
34 #endif  // XNN_PLATFORM_JIT
35   }
36   return ukernel;
37 }
38 
xnn_is_hmp_gemm_ukernel(struct xnn_hmp_gemm_ukernel ukernel)39 static inline bool xnn_is_hmp_gemm_ukernel(struct xnn_hmp_gemm_ukernel ukernel) {
40 #if XNN_MAX_UARCH_TYPES == 1
41   return false;
42 #else
43   uintptr_t default_function = (uintptr_t) ukernel.function[XNN_UARCH_DEFAULT];
44   uintptr_t difference = 0;
45   for (size_t i = 1; i < XNN_MAX_UARCH_TYPES; i++) {
46     difference |= (default_function ^ (uintptr_t) ukernel.function[i]);
47   }
48   return difference != 0;
49 #endif
50 }
51 
52 struct xnn_hmp_igemm_ukernel {
53   xnn_igemm_ukernel_function function[XNN_MAX_UARCH_TYPES];
54 #if XNN_PLATFORM_JIT
55   size_t generated_code_offset[XNN_MAX_UARCH_TYPES];
56 #endif  // XNN_PLATFORM_JIT
57 };
58 
xnn_init_hmp_igemm_ukernel(xnn_igemm_ukernel_function function)59 static inline struct xnn_hmp_igemm_ukernel xnn_init_hmp_igemm_ukernel(xnn_igemm_ukernel_function function) {
60   struct xnn_hmp_igemm_ukernel ukernel = {{ function }};
61   for (size_t i = 1; i < XNN_MAX_UARCH_TYPES; i++) {
62     ukernel.function[i] = function;
63 #if XNN_PLATFORM_JIT
64     ukernel.generated_code_offset[i] = SIZE_MAX;
65 #endif  // XNN_PLATFORM_JIT
66   }
67   return ukernel;
68 }
69 
xnn_is_hmp_igemm_ukernel(struct xnn_hmp_igemm_ukernel ukernel)70 static inline bool xnn_is_hmp_igemm_ukernel(struct xnn_hmp_igemm_ukernel ukernel) {
71 #if XNN_MAX_UARCH_TYPES == 1
72   return false;
73 #else
74   uintptr_t default_function = (uintptr_t) ukernel.function[XNN_UARCH_DEFAULT];
75   uintptr_t difference = 0;
76   for (size_t i = 1; i < XNN_MAX_UARCH_TYPES; i++) {
77     difference |= (default_function ^ (uintptr_t) ukernel.function[i]);
78   }
79   return difference != 0;
80 #endif
81 }
82 
83 // Largest GEMM/IGEMM MR used in init.c is 7 (x86 AVX512).
84 // Largest GEMM/IGEMM MR is 8 in e2e benchmarks.
85 #define XNN_MAX_MR 8
86 
87 struct gemm_fused_ukernels {
88   struct xnn_hmp_gemm_ukernel gemm[XNN_MAX_MR];
89   struct xnn_hmp_igemm_ukernel igemm[XNN_MAX_MR];
90 };
91 
92 struct transpose_parameters {
93   union {
94     xnn_transposec_ukernel_function const_size_ukernel;
95     xnn_transposev_ukernel_function variable_size_ukernel;
96   };
97   // Maximum number of elements to process per ukernel call.
98   uint8_t tile_size;
99 };
100 
101 #if XNN_PLATFORM_JIT
102 struct xnn_hmp_gemm_codegen {
103   xnn_jit_gemm_code_generator_function function[XNN_MAX_UARCH_TYPES];
104 };
105 
xnn_init_hmp_gemm_codegen(xnn_jit_gemm_code_generator_function function)106 static inline struct xnn_hmp_gemm_codegen xnn_init_hmp_gemm_codegen(xnn_jit_gemm_code_generator_function function) {
107   struct xnn_hmp_gemm_codegen ukernel = {{ function }};
108   for (size_t i = 1; i < XNN_MAX_UARCH_TYPES; i++) {
109     ukernel.function[i] = function;
110   }
111   return ukernel;
112 }
113 
xnn_is_hmp_gemm_codegen(struct xnn_hmp_gemm_codegen ukernel)114 static inline bool xnn_is_hmp_gemm_codegen(struct xnn_hmp_gemm_codegen ukernel) {
115 #if XNN_MAX_UARCH_TYPES == 1
116   return false;
117 #else
118   uintptr_t default_function = (uintptr_t) ukernel.function[XNN_UARCH_DEFAULT];
119   uintptr_t difference = 0;
120   for (size_t i = 1; i < XNN_MAX_UARCH_TYPES; i++) {
121     difference |= (default_function ^ (uintptr_t) ukernel.function[i]);
122   }
123   return difference != 0;
124 #endif
125 }
126 
127 struct xnn_hmp_igemm_codegen {
128   xnn_jit_igemm_code_generator_function function[XNN_MAX_UARCH_TYPES];
129 };
130 
xnn_init_hmp_igemm_codegen(xnn_jit_igemm_code_generator_function function)131 static inline struct xnn_hmp_igemm_codegen xnn_init_hmp_igemm_codegen(xnn_jit_igemm_code_generator_function function) {
132   struct xnn_hmp_igemm_codegen ukernel = {{ function }};
133   for (size_t i = 1; i < XNN_MAX_UARCH_TYPES; i++) {
134     ukernel.function[i] = function;
135   }
136   return ukernel;
137 }
138 
xnn_is_hmp_igemm_codegen(struct xnn_hmp_igemm_codegen ukernel)139 static inline bool xnn_is_hmp_igemm_codegen(struct xnn_hmp_igemm_codegen ukernel) {
140 #if XNN_MAX_UARCH_TYPES == 1
141   return false;
142 #else
143   uintptr_t default_function = (uintptr_t) ukernel.function[XNN_UARCH_DEFAULT];
144   uintptr_t difference = 0;
145   for (size_t i = 1; i < XNN_MAX_UARCH_TYPES; i++) {
146     difference |= (default_function ^ (uintptr_t) ukernel.function[i]);
147   }
148   return difference != 0;
149 #endif
150 }
151 
152 struct gemm_codegens {
153   struct xnn_hmp_gemm_codegen gemm;
154   struct xnn_hmp_igemm_codegen igemm;
155   // Optional JIT GEMM and IGEMM micro-kernels with MR=1 and the same NR and KR parameters.
156   struct xnn_hmp_gemm_codegen gemm1;
157   struct xnn_hmp_igemm_codegen igemm1;
158 };
159 #endif  // XNN_PLATFORM_JIT
160 
161 struct gemm_parameters {
162   struct gemm_fused_ukernels minmax;
163   struct gemm_fused_ukernels relu;
164   struct gemm_fused_ukernels linear;
165 #if XNN_PLATFORM_JIT
166   struct gemm_codegens generator;
167 #endif  // XNN_PLATFORM_JIT
168   union {
169     xnn_init_qc8_conv_minmax_params_fn qc8;
170     xnn_init_qs8_conv_minmax_params_fn qs8;
171     xnn_init_qu8_conv_minmax_params_fn qu8;
172     xnn_init_f16_minmax_params_fn f16;
173     xnn_init_f32_minmax_params_fn f32;
174   } init;
175   uint8_t mr;
176   uint8_t nr;
177   uint8_t log2_kr;
178   uint8_t log2_sr;
179 };
180 
181 struct vunary_parameters {
182   xnn_vunary_ukernel_function ukernel;
183   union {
184     xnn_init_f16_f32_cvt_params_fn f16_f32_cvt;
185     xnn_init_f16_abs_params_fn f16_abs;
186     xnn_init_f16_elu_params_fn f16_elu;
187     xnn_init_f16_hswish_params_fn f16_hswish;
188     xnn_init_f16_lrelu_params_fn f16_lrelu;
189     xnn_init_f16_neg_params_fn f16_neg;
190     xnn_init_f16_minmax_params_fn f16_minmax;
191     xnn_init_f16_sigmoid_params_fn f16_sigmoid;
192     xnn_init_f16_sqrt_params_fn f16_sqrt;
193     xnn_init_f32_abs_params_fn f32_abs;
194     xnn_init_f32_default_params_fn f32_default;
195     xnn_init_f32_elu_params_fn f32_elu;
196     xnn_init_f32_f16_cvt_params_fn f32_f16_cvt;
197     xnn_init_f32_hswish_params_fn f32_hswish;
198     xnn_init_f32_lrelu_params_fn f32_lrelu;
199     xnn_init_f32_minmax_params_fn f32_minmax;
200     xnn_init_f32_neg_params_fn f32_neg;
201     xnn_init_f32_qs8_cvt_params_fn f32_qs8_cvt;
202     xnn_init_f32_qu8_cvt_params_fn f32_qu8_cvt;
203     xnn_init_f32_rnd_params_fn f32_rnd;
204     xnn_init_f32_sigmoid_params_fn f32_sigmoid;
205     xnn_init_f32_sqrt_params_fn f32_sqrt;
206     xnn_init_qs8_cvt_params_fn qs8_cvt;
207     xnn_init_qs8_f32_cvt_params_fn qs8_f32_cvt;
208     xnn_init_qs8_lrelu_params_fn qs8_lrelu;
209     xnn_init_qu8_cvt_params_fn qu8_cvt;
210     xnn_init_qu8_f32_cvt_params_fn qu8_f32_cvt;
211     xnn_init_qu8_lrelu_params_fn qu8_lrelu;
212     xnn_init_s8_minmax_params_fn s8_minmax;
213     xnn_init_u8_minmax_params_fn u8_minmax;
214   } init;
215   // Number of elements in a tile.
216   // For best efficiency, micro-kernel must process a multiple of this number of elements in each call.
217   uint8_t element_tile;
218 };
219 
220 struct vbinary_fused_ukernels {
221   xnn_vbinary_ukernel_function op_ukernel;
222   xnn_vbinary_ukernel_function opc_ukernel;
223   xnn_vbinary_ukernel_function ropc_ukernel;
224 };
225 
226 struct vbinary_parameters {
227   struct vbinary_fused_ukernels minmax;
228   struct vbinary_fused_ukernels linear;
229   union {
230     xnn_init_f16_minmax_params_fn f16_minmax;
231     xnn_init_f32_default_params_fn f32_default;
232     xnn_init_f32_minmax_params_fn f32_minmax;
233     xnn_init_qs8_add_minmax_params_fn qs8_add;
234     xnn_init_qs8_mul_minmax_params_fn qs8_mul;
235     xnn_init_qu8_add_minmax_params_fn qu8_add;
236     xnn_init_qu8_mul_minmax_params_fn qu8_mul;
237   } init;
238   // Number of elements in a tile.
239   // For best efficiency, micro-kernel must process a multiple of this number of elements in each call.
240   uint8_t element_tile;
241 };
242 
243 struct spmm_parameters {
244   xnn_spmm_ukernel_function ukernel;
245   // Number of M-dimension elements in a tile.
246   // Corresponds to a block of pixels in 1x1 Convolution and a block of batch size in Fully Connected operator.
247   uint8_t mr;
248   // Number of N-dimension elements in a tile.
249   // Corresponds to a block of output channels/features in 1x1 Convolution and Fully Connected operator.
250   uint8_t nr;
251 };
252 
253 struct conv_hwc2chw_parameters {
254   xnn_conv_hwc2chw_ukernel_function ukernel_with_symm_padding;
255   // Number of output channels in a tile.
256   // This parameter must be passed as is to weight packing function.
257   uint8_t output_channel_tile;
258   // Number of output height pixels in a tile.
259   // For best efficiency, micro-kernel must produce a multiple of this number of rows in each call.
260   uint8_t output_height_tile;
261   // Number of output width pixels in a tile.
262   uint8_t output_width_tile;
263 };
264 
265 struct dwconv2d_chw_parameters {
266   xnn_dwconv2d_chw_ukernel_function ukernel;
267   // Number of output width pixels in a tile.
268   uint8_t output_width_tile;
269   // Number of output height pixels in a tile.
270   // For best efficiency, micro-kernel must produce a multiple of this number of rows in each call.
271   uint8_t output_height_tile;
272 };
273 
274 struct gavgpool_cw_parameters {
275   xnn_gavgpool_cw_ukernel_function ukernel;
276   // Number of channels in a tile.
277   // For best efficiency, micro-kernel must process a multiple of this number of channels in each call.
278   uint8_t channel_tile;
279 };
280 
281 union dwconv_fused_ukernels {
282   xnn_dwconv_unipass_ukernel_function unipass;
283   xnn_dwconv_multipass_ukernel_function multipass;
284 };
285 
286 struct dwconv_parameters {
287   union dwconv_fused_ukernels minmax;
288   union dwconv_fused_ukernels linear;
289   union {
290     xnn_init_qc8_conv_minmax_params_fn qc8;
291     xnn_init_qs8_conv_minmax_params_fn qs8;
292     xnn_init_qu8_conv_minmax_params_fn qu8;
293     xnn_init_f16_minmax_params_fn f16;
294     xnn_init_f32_minmax_params_fn f32;
295   } init;
296   uint8_t channel_tile;
297   uint8_t primary_tile;
298   uint8_t incremental_tile;
299 };
300 
301 struct gavgpool_parameters {
302   xnn_gavgpool_unipass_ukernel_function unipass;
303   xnn_gavgpool_multipass_ukernel_function multipass;
304   union {
305     xnn_init_f16_scaleminmax_params_fn f16;
306     xnn_init_f32_scaleminmax_params_fn f32;
307     xnn_init_qs8_avgpool_minmax_params_fn qs8;
308     xnn_init_qu8_avgpool_minmax_params_fn qu8;
309   } init;
310   union {
311     xnn_update_f16_scaleminmax_params_fn f16;
312     xnn_update_f32_scaleminmax_params_fn f32;
313     xnn_update_qs8_avgpool_minmax_params_fn qs8;
314     xnn_update_qu8_avgpool_minmax_params_fn qu8;
315   } update;
316   // Number of rows in a tile.
317   // For best efficiency, micro-kernel must produce a multiple of this number of rows in each call.
318   uint16_t row_tile;
319   // Number of channels in a tile.
320   // For best efficiency, micro-kernel must process a multiple of this number of channels in each call.
321   uint16_t channel_tile;
322 };
323 
324 struct avgpool_parameters {
325   xnn_avgpool_unipass_ukernel_function unipass;
326   xnn_avgpool_multipass_ukernel_function multipass;
327   union {
328     xnn_init_f16_scaleminmax_params_fn f16;
329     xnn_init_f32_scaleminmax_params_fn f32;
330     xnn_init_qu8_avgpool_minmax_params_fn qu8;
331   } init;
332   // Number of rows in a primary tile.
333   // Unipass micro-kernel must be called with this number of rows, or fewer.
334   // Multipass micro-kernel must be called with more than this number of rows.
335   uint8_t primary_tile;
336   // Number of rows in an incremental tile.
337   // For best efficiency, multipass micro-kernel must process the number of rows in the primary tile plus a multiple
338   // of this number of rows in each call. This number has no meaning for the unipass micro-kernel.
339   uint8_t incremental_tile;
340   // Number of channels in a tile.
341   // For best efficiency, micro-kernel must process a multiple of this number of channels in each call.
342   uint16_t channel_tile;
343 };
344 
345 struct pavgpool_parameters {
346   xnn_pavgpool_unipass_ukernel_function unipass;
347   xnn_pavgpool_multipass_ukernel_function multipass;
348   union {
349     xnn_init_f16_minmax_params_fn f16;
350     xnn_init_f32_minmax_params_fn f32;
351   } init;
352   // Number of rows in a primary tile.
353   // Unipass micro-kernel must be called with this number of rows, or fewer.
354   // Multipass micro-kernel must be called with more than this number of rows.
355   uint8_t primary_tile;
356   // Number of rows in an incremental tile.
357   // For best efficiency, multipass micro-kernel must process the number of rows in the primary tile plus a multiple
358   // of this number of rows in each call. This number has no meaning for the unipass micro-kernel.
359   uint8_t incremental_tile;
360   // Number of channels in a tile.
361   // For best efficiency, micro-kernel must process a multiple of this number of channels in each call.
362   uint16_t channel_tile;
363 };
364 
365 struct argmaxpool_parameters {
366   union {
367     xnn_argmaxpool_unipass_ukernel_function up;
368     xnn_argmaxpool_multipass_ukernel_function mp;
369   };
370   uint8_t mr;
371   uint8_t qr;
372 };
373 
374 struct maxpool_parameters {
375   xnn_maxpool_ukernel_function ukernel;
376   union {
377     xnn_init_s8_minmax_params_fn s8;
378     xnn_init_u8_minmax_params_fn u8;
379     xnn_init_f32_minmax_params_fn f32;
380     xnn_init_f16_minmax_params_fn f16;
381   } init;
382   uint8_t mr;
383   uint8_t qr;
384 };
385 
386 struct ibilinear_parameters {
387   xnn_ibilinear_ukernel_function ukernel;
388   // Number of output pixels in a tile.
389   // For best efficiency, micro-kernel must produce a multiple of this number of pixels in each call.
390   uint8_t pixel_tile;
391   // Number of channels in a tile.
392   // For best efficiency, micro-kernel must process a multiple of this number of channels in each call.
393   uint8_t channel_tile;
394 };
395 
396 struct ibilinear_chw_parameters {
397   xnn_ibilinear_chw_ukernel_function ukernel;
398   // Number of output pixels in a tile.
399   // For best efficiency, micro-kernel must produce a multiple of this number of pixels in each call.
400   uint8_t pixel_tile;
401   // Number of channels in a tile.
402   // For best efficiency, micro-kernel must process a multiple of this number of channels in each call.
403   uint8_t channel_tile;
404 };
405 
406 struct zip_parameters {
407   xnn_zipc_ukernel_function x2;
408   xnn_zipc_ukernel_function x3;
409   xnn_zipc_ukernel_function x4;
410   xnn_zipv_ukernel_function xm;
411 };
412 
413 struct prelu_parameters {
414   xnn_prelu_ukernel_function ukernel;
415   uint16_t row_tile;
416   uint16_t channel_tile;
417 };
418 
419 struct raddstoreexpminusmax_parameters {
420   xnn_raddstoreexpminusmax_ukernel_function ukernel;
421   union {
422     xnn_init_f16_expminus_params_fn f16;
423     xnn_init_f32_expminus_params_fn f32;
424   } init;
425   // Number of elements in a tile.
426   // For best efficiency, micro-kernel must process a multiple of this number of elements in each call.
427   uint8_t element_tile;
428 };
429 
430 struct fill_parameters {
431   xnn_fill_ukernel_function ukernel;
432   // Number of rows of inputs processed in one tile.
433   // For best efficiency, micro-kernel must produce a multiple of this number of rows in each call.
434   uint8_t row_tile;
435 };
436 
437 struct pad_parameters {
438   xnn_pad_ukernel_function ukernel;
439   // Number of rows of inputs processed in one tile.
440   // For best efficiency, micro-kernel must produce a multiple of this number of rows in each call.
441   uint8_t row_tile;
442 };
443 
444 struct vmulcaddc_parameters {
445   xnn_vmulcaddc_ukernel_function ukernel;
446   union {
447     xnn_init_f16_minmax_params_fn f16;
448     xnn_init_f32_minmax_params_fn f32;
449   } init;
450   uint8_t channel_tile;
451   uint8_t row_tile;
452 };
453 
454 #define XNN_MAX_QC8_DWCONV_UKERNELS 3
455 #define XNN_MAX_QS8_DWCONV_UKERNELS 2
456 #define XNN_MAX_QU8_DWCONV_UKERNELS 2
457 #define XNN_MAX_F16_DWCONV_UKERNELS 4
458 #define XNN_MAX_F32_DWCONV_UKERNELS 4
459 #define XNN_MAX_F32_ARGMAXPOOL_UKERNELS 3
460 
461 // Indicates that XNNPACK as a whole has initialized.
462 // This does not guarantee that any particular microkernels are available.
463 #define XNN_INIT_FLAG_XNNPACK    0x00000001
464 // Indicates that F32 XNNPACK microkernels are available for use.
465 #define XNN_INIT_FLAG_F32        0x00000002
466 // Indicates that X32 XNNPACK microkernels are available for use.
467 #define XNN_INIT_FLAG_X32        0x00000004
468 // Indicates that F16 XNNPACK microkernels are available for use.
469 #define XNN_INIT_FLAG_F16        0x00000008
470 // Indicates that F16 XNNPACK microkernels are natively supported by the hardware.
471 #define XNN_INIT_FLAG_F16_NATIVE 0x00000010
472 // Indicates that X16 XNNPACK microkernels are available for use.
473 #define XNN_INIT_FLAG_X16        0x00000020
474 // Indicates that QC8 XNNPACK microkernels are available for use.
475 #define XNN_INIT_FLAG_QC8        0x00000040
476 // Indicates that QS8 XNNPACK microkernels are available for use.
477 #define XNN_INIT_FLAG_QS8        0x00000080
478 // Indicates that QU8 XNNPACK microkernels are available for use.
479 #define XNN_INIT_FLAG_QU8        0x00000100
480 // Indicates that S8 XNNPACK microkernels are available for use.
481 #define XNN_INIT_FLAG_S8         0x00000200
482 // Indicates that U8 XNNPACK microkernels are available for use.
483 #define XNN_INIT_FLAG_U8         0x00000400
484 // Indicates that X8 XNNPACK microkernels are available for use.
485 #define XNN_INIT_FLAG_X8         0x00000800
486 // Indicates that XX XNNPACK microkernels are available for use.
487 #define XNN_INIT_FLAG_XX         0x00001000
488 // Indicates that VCVT XNNPACK microkernels are available for use.
489 #define XNN_INIT_FLAG_VCVT       0x00002000
490 // Indicates that CHW XNNPACK microkernels are optimized for the host platform.
491 #define XNN_INIT_FLAG_CHW_OPT    0x00004000
492 
493 struct xnn_parameters {
494   // Bitwise combination of XNN_INIT_FLAG_* flags
495   uint32_t init_flags;
496   struct xnn_allocator allocator;
497   size_t page_size;
498   struct {
499     struct gemm_parameters gemm;
500     struct dwconv_parameters dwconv[XNN_MAX_QC8_DWCONV_UKERNELS];
501   } qc8;
502   struct {
503     struct gemm_parameters gemm;
504     struct dwconv_parameters dwconv[XNN_MAX_QS8_DWCONV_UKERNELS];
505     struct gavgpool_parameters gavgpool;
506     struct vbinary_parameters vadd;
507     struct vbinary_parameters vmul;
508     struct vunary_parameters lrelu;
509   } qs8;
510   struct {
511     struct gemm_parameters gemm;
512     struct dwconv_parameters dwconv[XNN_MAX_QU8_DWCONV_UKERNELS];
513     struct avgpool_parameters avgpool;
514     struct gavgpool_parameters gavgpool;
515     struct vbinary_parameters vadd;
516     struct vbinary_parameters vmul;
517     struct vunary_parameters lrelu;
518   } qu8;
519   struct {
520     struct vunary_parameters clamp;
521     // Bilinear interpolation (2D).
522     struct ibilinear_parameters ibilinear;
523     struct maxpool_parameters maxpool;
524   } s8;
525   struct {
526     struct vunary_parameters clamp;
527     // Bilinear interpolation (2D).
528     struct ibilinear_parameters ibilinear;
529     struct maxpool_parameters maxpool;
530     xnn_u8_lut32norm_ukernel_function lut32norm;
531     xnn_u8_rmax_ukernel_function rmax;
532   } u8;
533   struct {
534     xnn_x8_lut_ukernel_function lut;
535     struct zip_parameters zip;
536     struct transpose_parameters transpose;
537   } x8;
538   struct {
539     struct transpose_parameters transpose;
540   } x16;
541   struct {
542     struct gemm_parameters gemm;
543     struct gemm_parameters gemm2;
544     struct dwconv_parameters dwconv[XNN_MAX_F16_DWCONV_UKERNELS];
545     struct avgpool_parameters avgpool;
546     struct pavgpool_parameters pavgpool;
547     struct gavgpool_parameters gavgpool;
548     struct maxpool_parameters maxpool;
549     // Bilinear interpolation (2D).
550     struct ibilinear_parameters ibilinear;
551     struct vunary_parameters abs;
552     struct vunary_parameters clamp;
553     struct vunary_parameters elu;
554     struct vunary_parameters hswish;
555     struct vunary_parameters lrelu;
556     struct vunary_parameters neg;
557     struct vunary_parameters rndne;
558     struct vunary_parameters rndz;
559     struct vunary_parameters rndu;
560     struct vunary_parameters rndd;
561     struct vunary_parameters sigmoid;
562     struct vunary_parameters sqr;
563     struct vunary_parameters sqrt;
564     struct prelu_parameters prelu;
565     struct vbinary_parameters vadd;
566     struct vbinary_parameters vdiv;
567     struct vbinary_parameters vmax;
568     struct vbinary_parameters vmin;
569     struct vbinary_parameters vmul;
570     struct vbinary_parameters vsub;
571     struct vbinary_parameters vsqrdiff;
572     struct vmulcaddc_parameters vmulcaddc;
573     struct raddstoreexpminusmax_parameters raddstoreexpminusmax;
574     xnn_rmax_ukernel_function rmax;
575     // Sparse Matrix-Dense Matrix Multiplication (NR=1 block).
576     struct spmm_parameters spmm;
577     // Direct 3x3 stride-2 Convolution with 3 input channels and HWC->CHW layout conversion.
578     struct conv_hwc2chw_parameters conv_hwc2chw_3x3c3s2;
579     // Direct 3x3 stride-1 Convolution with padding 1 on left and right in CHW layout.
580     struct dwconv2d_chw_parameters dwconv2d_chw_3x3;
581     // Direct 3x3 stride-2 Convolution with padding 1 on left and right in CHW layout.
582     struct dwconv2d_chw_parameters dwconv2d_chw_3x3s2;
583     // Direct 5x5 stride-1 Convolution with padding 2 on left and right in CHW layout.
584     struct dwconv2d_chw_parameters dwconv2d_chw_5x5;
585     // Direct 5x5 stride-2 Convolution with padding 2 on left and right in CHW layout.
586     struct dwconv2d_chw_parameters dwconv2d_chw_5x5s2;
587     // Global Average Pooling in CW layout.
588     struct gavgpool_cw_parameters gavgpool_cw;
589     // Bilinear interpolation (2D) in CHW layout.
590     struct ibilinear_chw_parameters ibilinear_chw;
591   } f16;
592   struct {
593     struct gemm_parameters gemm;
594     struct gemm_parameters gemm2;
595     struct dwconv_parameters dwconv[XNN_MAX_F32_DWCONV_UKERNELS];
596     struct avgpool_parameters avgpool;
597     struct pavgpool_parameters pavgpool;
598     struct gavgpool_parameters gavgpool;
599     struct maxpool_parameters maxpool;
600     struct argmaxpool_parameters argmaxpool[XNN_MAX_F32_ARGMAXPOOL_UKERNELS];
601     // Bilinear interpolation (2D).
602     struct ibilinear_parameters ibilinear;
603     struct vunary_parameters abs;
604     struct vunary_parameters clamp;
605     struct vunary_parameters elu;
606     struct vunary_parameters hswish;
607     struct vunary_parameters lrelu;
608     struct vunary_parameters neg;
609     struct vunary_parameters relu;
610     struct vunary_parameters rndne;
611     struct vunary_parameters rndz;
612     struct vunary_parameters rndu;
613     struct vunary_parameters rndd;
614     struct vunary_parameters sigmoid;
615     struct vunary_parameters sqr;
616     struct vunary_parameters sqrt;
617     struct prelu_parameters prelu;
618     struct vbinary_parameters vadd;
619     struct vbinary_parameters vdiv;
620     struct vbinary_parameters vmax;
621     struct vbinary_parameters vmin;
622     struct vbinary_parameters vmul;
623     struct vbinary_parameters vsub;
624     struct vbinary_parameters vsqrdiff;
625     struct vmulcaddc_parameters vmulcaddc;
626     struct raddstoreexpminusmax_parameters raddstoreexpminusmax;
627     xnn_rmax_ukernel_function rmax;
628     // Sparse Matrix-Dense Matrix Multiplication (NR=1 block).
629     struct spmm_parameters spmm;
630     // Sparse Matrix-Dense Matrix Multiplication (NR=2 block).
631     struct spmm_parameters spmm2;
632     // Sparse Matrix-Dense Matrix Multiplication (NR=4 block).
633     struct spmm_parameters spmm4;
634     // Direct 3x3 stride-2 Convolution with 3 input channels and HWC->CHW layout conversion.
635     struct conv_hwc2chw_parameters conv_hwc2chw_3x3c3s2;
636     // Direct 3x3 stride-1 Convolution with padding 1 on left and right in CHW layout.
637     struct dwconv2d_chw_parameters dwconv2d_chw_3x3;
638     // Direct 3x3 stride-2 Convolution with padding 1 on left and right in CHW layout.
639     struct dwconv2d_chw_parameters dwconv2d_chw_3x3s2;
640     // Direct 5x5 stride-1 Convolution with padding 2 on left and right in CHW layout.
641     struct dwconv2d_chw_parameters dwconv2d_chw_5x5;
642     // Direct 5x5 stride-2 Convolution with padding 2 on left and right in CHW layout.
643     struct dwconv2d_chw_parameters dwconv2d_chw_5x5s2;
644     // Global Average Pooling in CW layout.
645     struct gavgpool_cw_parameters gavgpool_cw;
646     // Bilinear interpolation (2D) in CHW layout.
647     struct ibilinear_chw_parameters ibilinear_chw;
648   } f32;
649   struct {
650     struct vunary_parameters f16_to_f32;
651     struct vunary_parameters f32_to_f16;
652     struct vunary_parameters f32_to_qs8;
653     struct vunary_parameters f32_to_qu8;
654     struct vunary_parameters qs8;
655     struct vunary_parameters qs8_to_f32;
656     struct vunary_parameters qu8;
657     struct vunary_parameters qu8_to_f32;
658   } vcvt;
659   struct {
660     xnn_unpool_ukernel_function unpool;
661     struct zip_parameters zip;
662     struct transpose_parameters transpose;
663   } x32;
664   struct {
665     xnn_vunary_ukernel_function copy;
666     struct fill_parameters fill;
667     struct pad_parameters pad;
668     struct transpose_parameters transpose;
669   } xx;
670 };
671 
672 #ifdef __cplusplus
673 extern "C" XNN_INTERNAL struct xnn_parameters xnn_params;
674 #else
675 extern XNN_INTERNAL struct xnn_parameters xnn_params;
676 #endif
677