1 // Copyright (c) Facebook, Inc. and its affiliates.
2 // All rights reserved.
3 //
4 // Copyright 2019 Google LLC
5 //
6 // This source code is licensed under the BSD-style license found in the
7 // LICENSE file in the root directory of this source tree.
8
9 #pragma once
10
11 #include <stdbool.h>
12 #include <stddef.h>
13 #include <stdint.h>
14
15 #include <xnnpack.h>
16 #include <xnnpack/common.h>
17 #include <xnnpack/microfnptr.h>
18 #include <xnnpack/microparams.h>
19
20
21 struct xnn_hmp_gemm_ukernel {
22 xnn_gemm_ukernel_function function[XNN_MAX_UARCH_TYPES];
23 #if XNN_PLATFORM_JIT
24 size_t generated_code_offset[XNN_MAX_UARCH_TYPES];
25 #endif // XNN_PLATFORM_JIT
26 };
27
xnn_init_hmp_gemm_ukernel(xnn_gemm_ukernel_function function)28 static inline struct xnn_hmp_gemm_ukernel xnn_init_hmp_gemm_ukernel(xnn_gemm_ukernel_function function) {
29 struct xnn_hmp_gemm_ukernel ukernel = {{ function }};
30 for (size_t i = 1; i < XNN_MAX_UARCH_TYPES; i++) {
31 ukernel.function[i] = function;
32 #if XNN_PLATFORM_JIT
33 ukernel.generated_code_offset[i] = SIZE_MAX;
34 #endif // XNN_PLATFORM_JIT
35 }
36 return ukernel;
37 }
38
xnn_is_hmp_gemm_ukernel(struct xnn_hmp_gemm_ukernel ukernel)39 static inline bool xnn_is_hmp_gemm_ukernel(struct xnn_hmp_gemm_ukernel ukernel) {
40 #if XNN_MAX_UARCH_TYPES == 1
41 return false;
42 #else
43 uintptr_t default_function = (uintptr_t) ukernel.function[XNN_UARCH_DEFAULT];
44 uintptr_t difference = 0;
45 for (size_t i = 1; i < XNN_MAX_UARCH_TYPES; i++) {
46 difference |= (default_function ^ (uintptr_t) ukernel.function[i]);
47 }
48 return difference != 0;
49 #endif
50 }
51
52 struct xnn_hmp_igemm_ukernel {
53 xnn_igemm_ukernel_function function[XNN_MAX_UARCH_TYPES];
54 #if XNN_PLATFORM_JIT
55 size_t generated_code_offset[XNN_MAX_UARCH_TYPES];
56 #endif // XNN_PLATFORM_JIT
57 };
58
xnn_init_hmp_igemm_ukernel(xnn_igemm_ukernel_function function)59 static inline struct xnn_hmp_igemm_ukernel xnn_init_hmp_igemm_ukernel(xnn_igemm_ukernel_function function) {
60 struct xnn_hmp_igemm_ukernel ukernel = {{ function }};
61 for (size_t i = 1; i < XNN_MAX_UARCH_TYPES; i++) {
62 ukernel.function[i] = function;
63 #if XNN_PLATFORM_JIT
64 ukernel.generated_code_offset[i] = SIZE_MAX;
65 #endif // XNN_PLATFORM_JIT
66 }
67 return ukernel;
68 }
69
xnn_is_hmp_igemm_ukernel(struct xnn_hmp_igemm_ukernel ukernel)70 static inline bool xnn_is_hmp_igemm_ukernel(struct xnn_hmp_igemm_ukernel ukernel) {
71 #if XNN_MAX_UARCH_TYPES == 1
72 return false;
73 #else
74 uintptr_t default_function = (uintptr_t) ukernel.function[XNN_UARCH_DEFAULT];
75 uintptr_t difference = 0;
76 for (size_t i = 1; i < XNN_MAX_UARCH_TYPES; i++) {
77 difference |= (default_function ^ (uintptr_t) ukernel.function[i]);
78 }
79 return difference != 0;
80 #endif
81 }
82
83 // Largest GEMM/IGEMM MR used in init.c is 7 (x86 AVX512).
84 // Largest GEMM/IGEMM MR is 8 in e2e benchmarks.
85 #define XNN_MAX_MR 8
86
87 struct gemm_fused_ukernels {
88 struct xnn_hmp_gemm_ukernel gemm[XNN_MAX_MR];
89 struct xnn_hmp_igemm_ukernel igemm[XNN_MAX_MR];
90 };
91
92 struct transpose_parameters {
93 union {
94 xnn_transposec_ukernel_function const_size_ukernel;
95 xnn_transposev_ukernel_function variable_size_ukernel;
96 };
97 // Maximum number of elements to process per ukernel call.
98 uint8_t tile_size;
99 };
100
101 #if XNN_PLATFORM_JIT
102 struct xnn_hmp_gemm_codegen {
103 xnn_jit_gemm_code_generator_function function[XNN_MAX_UARCH_TYPES];
104 };
105
xnn_init_hmp_gemm_codegen(xnn_jit_gemm_code_generator_function function)106 static inline struct xnn_hmp_gemm_codegen xnn_init_hmp_gemm_codegen(xnn_jit_gemm_code_generator_function function) {
107 struct xnn_hmp_gemm_codegen ukernel = {{ function }};
108 for (size_t i = 1; i < XNN_MAX_UARCH_TYPES; i++) {
109 ukernel.function[i] = function;
110 }
111 return ukernel;
112 }
113
xnn_is_hmp_gemm_codegen(struct xnn_hmp_gemm_codegen ukernel)114 static inline bool xnn_is_hmp_gemm_codegen(struct xnn_hmp_gemm_codegen ukernel) {
115 #if XNN_MAX_UARCH_TYPES == 1
116 return false;
117 #else
118 uintptr_t default_function = (uintptr_t) ukernel.function[XNN_UARCH_DEFAULT];
119 uintptr_t difference = 0;
120 for (size_t i = 1; i < XNN_MAX_UARCH_TYPES; i++) {
121 difference |= (default_function ^ (uintptr_t) ukernel.function[i]);
122 }
123 return difference != 0;
124 #endif
125 }
126
127 struct xnn_hmp_igemm_codegen {
128 xnn_jit_igemm_code_generator_function function[XNN_MAX_UARCH_TYPES];
129 };
130
xnn_init_hmp_igemm_codegen(xnn_jit_igemm_code_generator_function function)131 static inline struct xnn_hmp_igemm_codegen xnn_init_hmp_igemm_codegen(xnn_jit_igemm_code_generator_function function) {
132 struct xnn_hmp_igemm_codegen ukernel = {{ function }};
133 for (size_t i = 1; i < XNN_MAX_UARCH_TYPES; i++) {
134 ukernel.function[i] = function;
135 }
136 return ukernel;
137 }
138
xnn_is_hmp_igemm_codegen(struct xnn_hmp_igemm_codegen ukernel)139 static inline bool xnn_is_hmp_igemm_codegen(struct xnn_hmp_igemm_codegen ukernel) {
140 #if XNN_MAX_UARCH_TYPES == 1
141 return false;
142 #else
143 uintptr_t default_function = (uintptr_t) ukernel.function[XNN_UARCH_DEFAULT];
144 uintptr_t difference = 0;
145 for (size_t i = 1; i < XNN_MAX_UARCH_TYPES; i++) {
146 difference |= (default_function ^ (uintptr_t) ukernel.function[i]);
147 }
148 return difference != 0;
149 #endif
150 }
151
152 struct gemm_codegens {
153 struct xnn_hmp_gemm_codegen gemm;
154 struct xnn_hmp_igemm_codegen igemm;
155 // Optional JIT GEMM and IGEMM micro-kernels with MR=1 and the same NR and KR parameters.
156 struct xnn_hmp_gemm_codegen gemm1;
157 struct xnn_hmp_igemm_codegen igemm1;
158 };
159 #endif // XNN_PLATFORM_JIT
160
161 struct gemm_parameters {
162 struct gemm_fused_ukernels minmax;
163 struct gemm_fused_ukernels relu;
164 struct gemm_fused_ukernels linear;
165 #if XNN_PLATFORM_JIT
166 struct gemm_codegens generator;
167 #endif // XNN_PLATFORM_JIT
168 union {
169 xnn_init_qc8_conv_minmax_params_fn qc8;
170 xnn_init_qs8_conv_minmax_params_fn qs8;
171 xnn_init_qu8_conv_minmax_params_fn qu8;
172 xnn_init_f16_minmax_params_fn f16;
173 xnn_init_f32_minmax_params_fn f32;
174 } init;
175 uint8_t mr;
176 uint8_t nr;
177 uint8_t log2_kr;
178 uint8_t log2_sr;
179 };
180
181 struct vunary_parameters {
182 xnn_vunary_ukernel_function ukernel;
183 union {
184 xnn_init_f16_f32_cvt_params_fn f16_f32_cvt;
185 xnn_init_f16_abs_params_fn f16_abs;
186 xnn_init_f16_elu_params_fn f16_elu;
187 xnn_init_f16_hswish_params_fn f16_hswish;
188 xnn_init_f16_lrelu_params_fn f16_lrelu;
189 xnn_init_f16_neg_params_fn f16_neg;
190 xnn_init_f16_minmax_params_fn f16_minmax;
191 xnn_init_f16_sigmoid_params_fn f16_sigmoid;
192 xnn_init_f16_sqrt_params_fn f16_sqrt;
193 xnn_init_f32_abs_params_fn f32_abs;
194 xnn_init_f32_default_params_fn f32_default;
195 xnn_init_f32_elu_params_fn f32_elu;
196 xnn_init_f32_f16_cvt_params_fn f32_f16_cvt;
197 xnn_init_f32_hswish_params_fn f32_hswish;
198 xnn_init_f32_lrelu_params_fn f32_lrelu;
199 xnn_init_f32_minmax_params_fn f32_minmax;
200 xnn_init_f32_neg_params_fn f32_neg;
201 xnn_init_f32_qs8_cvt_params_fn f32_qs8_cvt;
202 xnn_init_f32_qu8_cvt_params_fn f32_qu8_cvt;
203 xnn_init_f32_rnd_params_fn f32_rnd;
204 xnn_init_f32_sigmoid_params_fn f32_sigmoid;
205 xnn_init_f32_sqrt_params_fn f32_sqrt;
206 xnn_init_qs8_cvt_params_fn qs8_cvt;
207 xnn_init_qs8_f32_cvt_params_fn qs8_f32_cvt;
208 xnn_init_qs8_lrelu_params_fn qs8_lrelu;
209 xnn_init_qu8_cvt_params_fn qu8_cvt;
210 xnn_init_qu8_f32_cvt_params_fn qu8_f32_cvt;
211 xnn_init_qu8_lrelu_params_fn qu8_lrelu;
212 xnn_init_s8_minmax_params_fn s8_minmax;
213 xnn_init_u8_minmax_params_fn u8_minmax;
214 } init;
215 // Number of elements in a tile.
216 // For best efficiency, micro-kernel must process a multiple of this number of elements in each call.
217 uint8_t element_tile;
218 };
219
220 struct vbinary_fused_ukernels {
221 xnn_vbinary_ukernel_function op_ukernel;
222 xnn_vbinary_ukernel_function opc_ukernel;
223 xnn_vbinary_ukernel_function ropc_ukernel;
224 };
225
226 struct vbinary_parameters {
227 struct vbinary_fused_ukernels minmax;
228 struct vbinary_fused_ukernels linear;
229 union {
230 xnn_init_f16_minmax_params_fn f16_minmax;
231 xnn_init_f32_default_params_fn f32_default;
232 xnn_init_f32_minmax_params_fn f32_minmax;
233 xnn_init_qs8_add_minmax_params_fn qs8_add;
234 xnn_init_qs8_mul_minmax_params_fn qs8_mul;
235 xnn_init_qu8_add_minmax_params_fn qu8_add;
236 xnn_init_qu8_mul_minmax_params_fn qu8_mul;
237 } init;
238 // Number of elements in a tile.
239 // For best efficiency, micro-kernel must process a multiple of this number of elements in each call.
240 uint8_t element_tile;
241 };
242
243 struct spmm_parameters {
244 xnn_spmm_ukernel_function ukernel;
245 // Number of M-dimension elements in a tile.
246 // Corresponds to a block of pixels in 1x1 Convolution and a block of batch size in Fully Connected operator.
247 uint8_t mr;
248 // Number of N-dimension elements in a tile.
249 // Corresponds to a block of output channels/features in 1x1 Convolution and Fully Connected operator.
250 uint8_t nr;
251 };
252
253 struct conv_hwc2chw_parameters {
254 xnn_conv_hwc2chw_ukernel_function ukernel_with_symm_padding;
255 // Number of output channels in a tile.
256 // This parameter must be passed as is to weight packing function.
257 uint8_t output_channel_tile;
258 // Number of output height pixels in a tile.
259 // For best efficiency, micro-kernel must produce a multiple of this number of rows in each call.
260 uint8_t output_height_tile;
261 // Number of output width pixels in a tile.
262 uint8_t output_width_tile;
263 };
264
265 struct dwconv2d_chw_parameters {
266 xnn_dwconv2d_chw_ukernel_function ukernel;
267 // Number of output width pixels in a tile.
268 uint8_t output_width_tile;
269 // Number of output height pixels in a tile.
270 // For best efficiency, micro-kernel must produce a multiple of this number of rows in each call.
271 uint8_t output_height_tile;
272 };
273
274 struct gavgpool_cw_parameters {
275 xnn_gavgpool_cw_ukernel_function ukernel;
276 // Number of channels in a tile.
277 // For best efficiency, micro-kernel must process a multiple of this number of channels in each call.
278 uint8_t channel_tile;
279 };
280
281 union dwconv_fused_ukernels {
282 xnn_dwconv_unipass_ukernel_function unipass;
283 xnn_dwconv_multipass_ukernel_function multipass;
284 };
285
286 struct dwconv_parameters {
287 union dwconv_fused_ukernels minmax;
288 union dwconv_fused_ukernels linear;
289 union {
290 xnn_init_qc8_conv_minmax_params_fn qc8;
291 xnn_init_qs8_conv_minmax_params_fn qs8;
292 xnn_init_qu8_conv_minmax_params_fn qu8;
293 xnn_init_f16_minmax_params_fn f16;
294 xnn_init_f32_minmax_params_fn f32;
295 } init;
296 uint8_t channel_tile;
297 uint8_t primary_tile;
298 uint8_t incremental_tile;
299 };
300
301 struct gavgpool_parameters {
302 xnn_gavgpool_unipass_ukernel_function unipass;
303 xnn_gavgpool_multipass_ukernel_function multipass;
304 union {
305 xnn_init_f16_scaleminmax_params_fn f16;
306 xnn_init_f32_scaleminmax_params_fn f32;
307 xnn_init_qs8_avgpool_minmax_params_fn qs8;
308 xnn_init_qu8_avgpool_minmax_params_fn qu8;
309 } init;
310 union {
311 xnn_update_f16_scaleminmax_params_fn f16;
312 xnn_update_f32_scaleminmax_params_fn f32;
313 xnn_update_qs8_avgpool_minmax_params_fn qs8;
314 xnn_update_qu8_avgpool_minmax_params_fn qu8;
315 } update;
316 // Number of rows in a tile.
317 // For best efficiency, micro-kernel must produce a multiple of this number of rows in each call.
318 uint16_t row_tile;
319 // Number of channels in a tile.
320 // For best efficiency, micro-kernel must process a multiple of this number of channels in each call.
321 uint16_t channel_tile;
322 };
323
324 struct avgpool_parameters {
325 xnn_avgpool_unipass_ukernel_function unipass;
326 xnn_avgpool_multipass_ukernel_function multipass;
327 union {
328 xnn_init_f16_scaleminmax_params_fn f16;
329 xnn_init_f32_scaleminmax_params_fn f32;
330 xnn_init_qu8_avgpool_minmax_params_fn qu8;
331 } init;
332 // Number of rows in a primary tile.
333 // Unipass micro-kernel must be called with this number of rows, or fewer.
334 // Multipass micro-kernel must be called with more than this number of rows.
335 uint8_t primary_tile;
336 // Number of rows in an incremental tile.
337 // For best efficiency, multipass micro-kernel must process the number of rows in the primary tile plus a multiple
338 // of this number of rows in each call. This number has no meaning for the unipass micro-kernel.
339 uint8_t incremental_tile;
340 // Number of channels in a tile.
341 // For best efficiency, micro-kernel must process a multiple of this number of channels in each call.
342 uint16_t channel_tile;
343 };
344
345 struct pavgpool_parameters {
346 xnn_pavgpool_unipass_ukernel_function unipass;
347 xnn_pavgpool_multipass_ukernel_function multipass;
348 union {
349 xnn_init_f16_minmax_params_fn f16;
350 xnn_init_f32_minmax_params_fn f32;
351 } init;
352 // Number of rows in a primary tile.
353 // Unipass micro-kernel must be called with this number of rows, or fewer.
354 // Multipass micro-kernel must be called with more than this number of rows.
355 uint8_t primary_tile;
356 // Number of rows in an incremental tile.
357 // For best efficiency, multipass micro-kernel must process the number of rows in the primary tile plus a multiple
358 // of this number of rows in each call. This number has no meaning for the unipass micro-kernel.
359 uint8_t incremental_tile;
360 // Number of channels in a tile.
361 // For best efficiency, micro-kernel must process a multiple of this number of channels in each call.
362 uint16_t channel_tile;
363 };
364
365 struct argmaxpool_parameters {
366 union {
367 xnn_argmaxpool_unipass_ukernel_function up;
368 xnn_argmaxpool_multipass_ukernel_function mp;
369 };
370 uint8_t mr;
371 uint8_t qr;
372 };
373
374 struct maxpool_parameters {
375 xnn_maxpool_ukernel_function ukernel;
376 union {
377 xnn_init_s8_minmax_params_fn s8;
378 xnn_init_u8_minmax_params_fn u8;
379 xnn_init_f32_minmax_params_fn f32;
380 xnn_init_f16_minmax_params_fn f16;
381 } init;
382 uint8_t mr;
383 uint8_t qr;
384 };
385
386 struct ibilinear_parameters {
387 xnn_ibilinear_ukernel_function ukernel;
388 // Number of output pixels in a tile.
389 // For best efficiency, micro-kernel must produce a multiple of this number of pixels in each call.
390 uint8_t pixel_tile;
391 // Number of channels in a tile.
392 // For best efficiency, micro-kernel must process a multiple of this number of channels in each call.
393 uint8_t channel_tile;
394 };
395
396 struct ibilinear_chw_parameters {
397 xnn_ibilinear_chw_ukernel_function ukernel;
398 // Number of output pixels in a tile.
399 // For best efficiency, micro-kernel must produce a multiple of this number of pixels in each call.
400 uint8_t pixel_tile;
401 // Number of channels in a tile.
402 // For best efficiency, micro-kernel must process a multiple of this number of channels in each call.
403 uint8_t channel_tile;
404 };
405
406 struct zip_parameters {
407 xnn_zipc_ukernel_function x2;
408 xnn_zipc_ukernel_function x3;
409 xnn_zipc_ukernel_function x4;
410 xnn_zipv_ukernel_function xm;
411 };
412
413 struct prelu_parameters {
414 xnn_prelu_ukernel_function ukernel;
415 uint16_t row_tile;
416 uint16_t channel_tile;
417 };
418
419 struct raddstoreexpminusmax_parameters {
420 xnn_raddstoreexpminusmax_ukernel_function ukernel;
421 union {
422 xnn_init_f16_expminus_params_fn f16;
423 xnn_init_f32_expminus_params_fn f32;
424 } init;
425 // Number of elements in a tile.
426 // For best efficiency, micro-kernel must process a multiple of this number of elements in each call.
427 uint8_t element_tile;
428 };
429
430 struct fill_parameters {
431 xnn_fill_ukernel_function ukernel;
432 // Number of rows of inputs processed in one tile.
433 // For best efficiency, micro-kernel must produce a multiple of this number of rows in each call.
434 uint8_t row_tile;
435 };
436
437 struct pad_parameters {
438 xnn_pad_ukernel_function ukernel;
439 // Number of rows of inputs processed in one tile.
440 // For best efficiency, micro-kernel must produce a multiple of this number of rows in each call.
441 uint8_t row_tile;
442 };
443
444 struct vmulcaddc_parameters {
445 xnn_vmulcaddc_ukernel_function ukernel;
446 union {
447 xnn_init_f16_minmax_params_fn f16;
448 xnn_init_f32_minmax_params_fn f32;
449 } init;
450 uint8_t channel_tile;
451 uint8_t row_tile;
452 };
453
454 #define XNN_MAX_QC8_DWCONV_UKERNELS 3
455 #define XNN_MAX_QS8_DWCONV_UKERNELS 2
456 #define XNN_MAX_QU8_DWCONV_UKERNELS 2
457 #define XNN_MAX_F16_DWCONV_UKERNELS 4
458 #define XNN_MAX_F32_DWCONV_UKERNELS 4
459 #define XNN_MAX_F32_ARGMAXPOOL_UKERNELS 3
460
461 // Indicates that XNNPACK as a whole has initialized.
462 // This does not guarantee that any particular microkernels are available.
463 #define XNN_INIT_FLAG_XNNPACK 0x00000001
464 // Indicates that F32 XNNPACK microkernels are available for use.
465 #define XNN_INIT_FLAG_F32 0x00000002
466 // Indicates that X32 XNNPACK microkernels are available for use.
467 #define XNN_INIT_FLAG_X32 0x00000004
468 // Indicates that F16 XNNPACK microkernels are available for use.
469 #define XNN_INIT_FLAG_F16 0x00000008
470 // Indicates that F16 XNNPACK microkernels are natively supported by the hardware.
471 #define XNN_INIT_FLAG_F16_NATIVE 0x00000010
472 // Indicates that X16 XNNPACK microkernels are available for use.
473 #define XNN_INIT_FLAG_X16 0x00000020
474 // Indicates that QC8 XNNPACK microkernels are available for use.
475 #define XNN_INIT_FLAG_QC8 0x00000040
476 // Indicates that QS8 XNNPACK microkernels are available for use.
477 #define XNN_INIT_FLAG_QS8 0x00000080
478 // Indicates that QU8 XNNPACK microkernels are available for use.
479 #define XNN_INIT_FLAG_QU8 0x00000100
480 // Indicates that S8 XNNPACK microkernels are available for use.
481 #define XNN_INIT_FLAG_S8 0x00000200
482 // Indicates that U8 XNNPACK microkernels are available for use.
483 #define XNN_INIT_FLAG_U8 0x00000400
484 // Indicates that X8 XNNPACK microkernels are available for use.
485 #define XNN_INIT_FLAG_X8 0x00000800
486 // Indicates that XX XNNPACK microkernels are available for use.
487 #define XNN_INIT_FLAG_XX 0x00001000
488 // Indicates that VCVT XNNPACK microkernels are available for use.
489 #define XNN_INIT_FLAG_VCVT 0x00002000
490 // Indicates that CHW XNNPACK microkernels are optimized for the host platform.
491 #define XNN_INIT_FLAG_CHW_OPT 0x00004000
492
493 struct xnn_parameters {
494 // Bitwise combination of XNN_INIT_FLAG_* flags
495 uint32_t init_flags;
496 struct xnn_allocator allocator;
497 size_t page_size;
498 struct {
499 struct gemm_parameters gemm;
500 struct dwconv_parameters dwconv[XNN_MAX_QC8_DWCONV_UKERNELS];
501 } qc8;
502 struct {
503 struct gemm_parameters gemm;
504 struct dwconv_parameters dwconv[XNN_MAX_QS8_DWCONV_UKERNELS];
505 struct gavgpool_parameters gavgpool;
506 struct vbinary_parameters vadd;
507 struct vbinary_parameters vmul;
508 struct vunary_parameters lrelu;
509 } qs8;
510 struct {
511 struct gemm_parameters gemm;
512 struct dwconv_parameters dwconv[XNN_MAX_QU8_DWCONV_UKERNELS];
513 struct avgpool_parameters avgpool;
514 struct gavgpool_parameters gavgpool;
515 struct vbinary_parameters vadd;
516 struct vbinary_parameters vmul;
517 struct vunary_parameters lrelu;
518 } qu8;
519 struct {
520 struct vunary_parameters clamp;
521 // Bilinear interpolation (2D).
522 struct ibilinear_parameters ibilinear;
523 struct maxpool_parameters maxpool;
524 } s8;
525 struct {
526 struct vunary_parameters clamp;
527 // Bilinear interpolation (2D).
528 struct ibilinear_parameters ibilinear;
529 struct maxpool_parameters maxpool;
530 xnn_u8_lut32norm_ukernel_function lut32norm;
531 xnn_u8_rmax_ukernel_function rmax;
532 } u8;
533 struct {
534 xnn_x8_lut_ukernel_function lut;
535 struct zip_parameters zip;
536 struct transpose_parameters transpose;
537 } x8;
538 struct {
539 struct transpose_parameters transpose;
540 } x16;
541 struct {
542 struct gemm_parameters gemm;
543 struct gemm_parameters gemm2;
544 struct dwconv_parameters dwconv[XNN_MAX_F16_DWCONV_UKERNELS];
545 struct avgpool_parameters avgpool;
546 struct pavgpool_parameters pavgpool;
547 struct gavgpool_parameters gavgpool;
548 struct maxpool_parameters maxpool;
549 // Bilinear interpolation (2D).
550 struct ibilinear_parameters ibilinear;
551 struct vunary_parameters abs;
552 struct vunary_parameters clamp;
553 struct vunary_parameters elu;
554 struct vunary_parameters hswish;
555 struct vunary_parameters lrelu;
556 struct vunary_parameters neg;
557 struct vunary_parameters rndne;
558 struct vunary_parameters rndz;
559 struct vunary_parameters rndu;
560 struct vunary_parameters rndd;
561 struct vunary_parameters sigmoid;
562 struct vunary_parameters sqr;
563 struct vunary_parameters sqrt;
564 struct prelu_parameters prelu;
565 struct vbinary_parameters vadd;
566 struct vbinary_parameters vdiv;
567 struct vbinary_parameters vmax;
568 struct vbinary_parameters vmin;
569 struct vbinary_parameters vmul;
570 struct vbinary_parameters vsub;
571 struct vbinary_parameters vsqrdiff;
572 struct vmulcaddc_parameters vmulcaddc;
573 struct raddstoreexpminusmax_parameters raddstoreexpminusmax;
574 xnn_rmax_ukernel_function rmax;
575 // Sparse Matrix-Dense Matrix Multiplication (NR=1 block).
576 struct spmm_parameters spmm;
577 // Direct 3x3 stride-2 Convolution with 3 input channels and HWC->CHW layout conversion.
578 struct conv_hwc2chw_parameters conv_hwc2chw_3x3c3s2;
579 // Direct 3x3 stride-1 Convolution with padding 1 on left and right in CHW layout.
580 struct dwconv2d_chw_parameters dwconv2d_chw_3x3;
581 // Direct 3x3 stride-2 Convolution with padding 1 on left and right in CHW layout.
582 struct dwconv2d_chw_parameters dwconv2d_chw_3x3s2;
583 // Direct 5x5 stride-1 Convolution with padding 2 on left and right in CHW layout.
584 struct dwconv2d_chw_parameters dwconv2d_chw_5x5;
585 // Direct 5x5 stride-2 Convolution with padding 2 on left and right in CHW layout.
586 struct dwconv2d_chw_parameters dwconv2d_chw_5x5s2;
587 // Global Average Pooling in CW layout.
588 struct gavgpool_cw_parameters gavgpool_cw;
589 // Bilinear interpolation (2D) in CHW layout.
590 struct ibilinear_chw_parameters ibilinear_chw;
591 } f16;
592 struct {
593 struct gemm_parameters gemm;
594 struct gemm_parameters gemm2;
595 struct dwconv_parameters dwconv[XNN_MAX_F32_DWCONV_UKERNELS];
596 struct avgpool_parameters avgpool;
597 struct pavgpool_parameters pavgpool;
598 struct gavgpool_parameters gavgpool;
599 struct maxpool_parameters maxpool;
600 struct argmaxpool_parameters argmaxpool[XNN_MAX_F32_ARGMAXPOOL_UKERNELS];
601 // Bilinear interpolation (2D).
602 struct ibilinear_parameters ibilinear;
603 struct vunary_parameters abs;
604 struct vunary_parameters clamp;
605 struct vunary_parameters elu;
606 struct vunary_parameters hswish;
607 struct vunary_parameters lrelu;
608 struct vunary_parameters neg;
609 struct vunary_parameters relu;
610 struct vunary_parameters rndne;
611 struct vunary_parameters rndz;
612 struct vunary_parameters rndu;
613 struct vunary_parameters rndd;
614 struct vunary_parameters sigmoid;
615 struct vunary_parameters sqr;
616 struct vunary_parameters sqrt;
617 struct prelu_parameters prelu;
618 struct vbinary_parameters vadd;
619 struct vbinary_parameters vdiv;
620 struct vbinary_parameters vmax;
621 struct vbinary_parameters vmin;
622 struct vbinary_parameters vmul;
623 struct vbinary_parameters vsub;
624 struct vbinary_parameters vsqrdiff;
625 struct vmulcaddc_parameters vmulcaddc;
626 struct raddstoreexpminusmax_parameters raddstoreexpminusmax;
627 xnn_rmax_ukernel_function rmax;
628 // Sparse Matrix-Dense Matrix Multiplication (NR=1 block).
629 struct spmm_parameters spmm;
630 // Sparse Matrix-Dense Matrix Multiplication (NR=2 block).
631 struct spmm_parameters spmm2;
632 // Sparse Matrix-Dense Matrix Multiplication (NR=4 block).
633 struct spmm_parameters spmm4;
634 // Direct 3x3 stride-2 Convolution with 3 input channels and HWC->CHW layout conversion.
635 struct conv_hwc2chw_parameters conv_hwc2chw_3x3c3s2;
636 // Direct 3x3 stride-1 Convolution with padding 1 on left and right in CHW layout.
637 struct dwconv2d_chw_parameters dwconv2d_chw_3x3;
638 // Direct 3x3 stride-2 Convolution with padding 1 on left and right in CHW layout.
639 struct dwconv2d_chw_parameters dwconv2d_chw_3x3s2;
640 // Direct 5x5 stride-1 Convolution with padding 2 on left and right in CHW layout.
641 struct dwconv2d_chw_parameters dwconv2d_chw_5x5;
642 // Direct 5x5 stride-2 Convolution with padding 2 on left and right in CHW layout.
643 struct dwconv2d_chw_parameters dwconv2d_chw_5x5s2;
644 // Global Average Pooling in CW layout.
645 struct gavgpool_cw_parameters gavgpool_cw;
646 // Bilinear interpolation (2D) in CHW layout.
647 struct ibilinear_chw_parameters ibilinear_chw;
648 } f32;
649 struct {
650 struct vunary_parameters f16_to_f32;
651 struct vunary_parameters f32_to_f16;
652 struct vunary_parameters f32_to_qs8;
653 struct vunary_parameters f32_to_qu8;
654 struct vunary_parameters qs8;
655 struct vunary_parameters qs8_to_f32;
656 struct vunary_parameters qu8;
657 struct vunary_parameters qu8_to_f32;
658 } vcvt;
659 struct {
660 xnn_unpool_ukernel_function unpool;
661 struct zip_parameters zip;
662 struct transpose_parameters transpose;
663 } x32;
664 struct {
665 xnn_vunary_ukernel_function copy;
666 struct fill_parameters fill;
667 struct pad_parameters pad;
668 struct transpose_parameters transpose;
669 } xx;
670 };
671
672 #ifdef __cplusplus
673 extern "C" XNN_INTERNAL struct xnn_parameters xnn_params;
674 #else
675 extern XNN_INTERNAL struct xnn_parameters xnn_params;
676 #endif
677