1 // Copyright (c) Facebook, Inc. and its affiliates.
2 // All rights reserved.
3 //
4 // Copyright 2019 Google LLC
5 //
6 // This source code is licensed under the BSD-style license found in the
7 // LICENSE file in the root directory of this source tree.
8
9 #pragma once
10
11 #include <stdbool.h>
12 #include <stddef.h>
13 #include <stdint.h>
14
15 #include <xnnpack.h>
16 #include <xnnpack/common.h>
17
18 struct xnn_f16_default_params {
19 // Empty; serves to differentiate pointer types for micro-kernels without fused activation.
20 char _; // Dummy member variable to comply with the C standard
21 };
22
23 struct xnn_f16_relu_params {
24 // Empty; serves to differentiate pointer types for micro-kernels with different fused activations.
25 char _; // Dummy member variable to comply with the C standard
26 };
27
28 // scaleminmax is used for gemm/igemm ukernels.
29 struct xnn_f16_scaleminmax_params {
30 uint16_t scale;
31 uint16_t min;
32 uint16_t max;
33 };
34
35 struct xnn_f16_minmax_params {
36 uint16_t min;
37 uint16_t max;
38 };
39
40 union xnn_f32_default_params {
41 // Empty; serves to differentiate pointer types for micro-kernels without fused activation.
42 char _; // Dummy member variable to comply with the C standard
43 };
44
45 union xnn_f32_relu_params {
46 // Empty; serves to differentiate pointer types for micro-kernels with different fused activations.
47 char _; // Dummy member variable to comply with the C standard
48 };
49
50 union xnn_f32_minmax_params {
51 struct {
52 float min;
53 float max;
54 } scalar;
55 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
56 struct {
57 XNN_ALIGN(16) float min[4];
58 XNN_ALIGN(16) float max[4];
59 } sse;
60 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
61 };
62
63 union xnn_f32_abs_params {
64 char _; // Dummy member variable to comply with the C standard
65 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
66 struct {
67 XNN_ALIGN(16) float nonsign_mask[4];
68 } sse;
69 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
70 #if XNN_ARCH_WASMSIMD
71 struct {
72 float nonsign_mask;
73 } wasmsimd;
74 #endif // XNN_ARCH_WASMSIMD
75 };
76
77 union xnn_f32_neg_params {
78 char _; // Dummy member variable to comply with the C standard
79 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
80 struct {
81 XNN_ALIGN(16) float sign_mask[4];
82 } sse;
83 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
84 #if XNN_ARCH_WASMSIMD
85 struct {
86 float sign_mask;
87 } wasmsimd;
88 #endif // XNN_ARCH_WASMSIMD
89 };
90
91 union xnn_f32_rnd_params {
92 char _; // Dummy member variable to comply with the C standard
93 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
94 struct {
95 XNN_ALIGN(16) float sign_mask[4];
96 XNN_ALIGN(16) float one[4];
97 } sse2;
98 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
99 };
100
101 union xnn_f32_elu_params {
102 struct {
103 float prescale;
104 float alpha;
105 float beta;
106 } scalar;
107 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
108 struct {
109 XNN_ALIGN(16) float prescale[4];
110 XNN_ALIGN(16) float alpha[4];
111 XNN_ALIGN(16) float beta[4];
112 } sse;
113 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
114 };
115
116 union xnn_f32_lrelu_params {
117 struct {
118 float slope;
119 } scalar;
120 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
121 struct {
122 XNN_ALIGN(16) float slope[4];
123 } sse;
124 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
125 };
126
127 union xnn_f32_sqrt_params {
128 char _; // Dummy member variable to comply with the C standard
129 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
130 struct {
131 float half;
132 } fma;
133 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
134 };
135
136 union xnn_f32_chw_params {
137 struct {
138 XNN_ALIGN(16) int32_t mask_even[4]; // used by stride 2 kernels
139 XNN_ALIGN(16) int32_t mask_odd[4]; // used by stride 2 kernels
140 XNN_ALIGN(16) int32_t mask[4]; // used by stride 1 kernels
141 float min;
142 float max;
143 } scalar;
144 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
145 struct {
146 float min;
147 float max;
148 XNN_ALIGN(16) uint32_t mask_even[4]; // used by stride 2 kernels
149 XNN_ALIGN(16) uint32_t mask_odd[4]; // used by stride 2 kernels
150 XNN_ALIGN(16) uint32_t mask[4]; // used by stride 1 kernels
151 } neon;
152 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
153 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
154 struct {
155 XNN_ALIGN(16) float min[4];
156 XNN_ALIGN(16) float max[4];
157 XNN_ALIGN(16) uint32_t mask_even[4]; // used by stride 2 kernels
158 XNN_ALIGN(16) uint32_t mask_odd[4]; // used by stride 2 kernels
159 XNN_ALIGN(16) uint32_t mask[4]; // used by stride 1 kernels
160 } sse;
161 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
162 };
163
164 union xnn_u8_minmax_params {
165 struct {
166 int32_t min;
167 int32_t max;
168 } scalar;
169 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
170 struct {
171 uint8_t min;
172 uint8_t max;
173 } neon;
174 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
175 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
176 struct {
177 XNN_ALIGN(16) uint8_t min[16];
178 XNN_ALIGN(16) uint8_t max[16];
179 } sse2;
180 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
181 };
182
183 union xnn_f32_scaleminmax_params {
184 struct {
185 float scale;
186 float min;
187 float max;
188 } scalar;
189 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
190 struct {
191 XNN_ALIGN(16) float scale[4];
192 XNN_ALIGN(16) float min[4];
193 XNN_ALIGN(16) float max[4];
194 } sse2;
195 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
196 };
197
198 union xnn_f32_gavgpool_params {
199 struct {
200 XNN_ALIGN(16) int32_t mask[4];
201 float multiplier;
202 float output_min;
203 float output_max;
204 } scalar;
205 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
206 struct {
207 XNN_ALIGN(16) float multiplier[4];
208 XNN_ALIGN(16) float output_min[4];
209 XNN_ALIGN(16) float output_max[4];
210 XNN_ALIGN(16) uint32_t mask[4];
211 } sse;
212 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
213 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
214 struct {
215 XNN_ALIGN(16) float multiplier;
216 XNN_ALIGN(16) float output_min;
217 XNN_ALIGN(16) float output_max;
218 XNN_ALIGN(16) uint32_t mask[4];
219 } neon;
220 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 */
221 };
222
223 struct xnn_f16_hswish_params {
224 uint16_t sixth;
225 uint16_t three;
226 uint16_t six;
227 };
228
229 union xnn_f32_hswish_params {
230 struct {
231 float sixth;
232 float three;
233 float six;
234 } scalar;
235 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
236 struct {
237 XNN_ALIGN(16) float sixth[4];
238 XNN_ALIGN(16) float half[4];
239 XNN_ALIGN(16) float one[4];
240 } sse;
241 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
242 };
243
244 union xnn_qu8_gemm_params {
245 struct {
246 int32_t kernel_zero_point;
247 int32_t multiplier;
248 int32_t remainder_mask;
249 int32_t remainder_threshold;
250 uint32_t shift;
251 int32_t output_min_less_zero_point;
252 int32_t output_max_less_zero_point;
253 int32_t output_zero_point;
254 } scalar;
255 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
256 struct {
257 int32_t kernel_zero_point;
258 int32_t multiplier;
259 int32_t right_shift;
260 int16_t output_zero_point;
261 uint8_t output_min;
262 uint8_t output_max;
263 } neon;
264 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
265 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
266 struct {
267 XNN_ALIGN(16) int16_t kernel_zero_point[8];
268 XNN_ALIGN(16) uint32_t multiplier[4];
269 XNN_ALIGN(16) uint64_t rounding[2];
270 XNN_ALIGN(16) int32_t remainder_mask[4];
271 XNN_ALIGN(16) int32_t remainder_threshold[4];
272 XNN_ALIGN(16) uint64_t shift[2];
273 XNN_ALIGN(16) int16_t output_zero_point[8];
274 XNN_ALIGN(16) uint8_t output_min[16];
275 XNN_ALIGN(16) uint8_t output_max[16];
276 } sse2;
277 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
278 };
279
280 union xnn_qs8_gemm_params {
281 struct {
282 int32_t multiplier;
283 int32_t remainder_mask;
284 int32_t remainder_threshold;
285 uint32_t shift;
286 int32_t output_min_less_zero_point;
287 int32_t output_max_less_zero_point;
288 int32_t output_zero_point;
289 } scalar;
290 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
291 struct {
292 int32_t multiplier;
293 int32_t right_shift;
294 int16_t output_zero_point;
295 int8_t output_min;
296 int8_t output_max;
297 } neon;
298 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
299 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
300 struct {
301 XNN_ALIGN(16) uint32_t multiplier[4];
302 XNN_ALIGN(16) uint64_t rounding[2];
303 XNN_ALIGN(16) int32_t remainder_mask[4];
304 XNN_ALIGN(16) int32_t remainder_threshold[4];
305 XNN_ALIGN(16) uint64_t shift[2];
306 XNN_ALIGN(16) int16_t output_zero_point[8];
307 XNN_ALIGN(16) int16_t output_min[8];
308 XNN_ALIGN(16) int16_t output_max[8];
309 } sse2;
310 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
311 #if XNN_ARCH_WASMSIMD
312 struct {
313 XNN_ALIGN(16) int64_t multiplier[2];
314 XNN_ALIGN(16) int64_t rounding[2];
315 XNN_ALIGN(16) int32_t remainder_mask[4];
316 XNN_ALIGN(16) int32_t remainder_threshold[4];
317 int32_t shift;
318 XNN_ALIGN(16) int16_t output_zero_point[8];
319 XNN_ALIGN(16) int8_t output_min[16];
320 XNN_ALIGN(16) int8_t output_max[16];
321 } wasmsimd;
322 #endif // XNN_ARCH_WASMSIMD
323 };
324
325 union xnn_qs8_gemm_xw_params {
326 struct {
327 int32_t multiplier;
328 int32_t remainder_mask;
329 int32_t remainder_threshold;
330 uint32_t shift;
331 int32_t output_min_less_zero_point;
332 int32_t output_max_less_zero_point;
333 int32_t output_zero_point;
334 } scalar;
335 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
336 struct {
337 int32_t multiplier;
338 int32_t right_shift;
339 int16_t output_zero_point;
340 int8_t output_min;
341 int8_t output_max;
342 } neon;
343 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
344 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
345 struct {
346 XNN_ALIGN(16) uint32_t multiplier[4];
347 XNN_ALIGN(16) uint64_t rounding[2];
348 XNN_ALIGN(16) int32_t remainder_mask[4];
349 XNN_ALIGN(16) int32_t remainder_threshold[4];
350 XNN_ALIGN(16) uint64_t shift[2];
351 XNN_ALIGN(16) int16_t output_zero_point[8];
352 XNN_ALIGN(16) int16_t output_min[8];
353 XNN_ALIGN(16) int16_t output_max[8];
354 } sse2;
355 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
356 #if XNN_ARCH_WASMSIMD
357 struct {
358 XNN_ALIGN(16) int64_t multiplier[2];
359 XNN_ALIGN(16) int64_t rounding[2];
360 XNN_ALIGN(16) int32_t remainder_mask[4];
361 XNN_ALIGN(16) int32_t remainder_threshold[4];
362 int32_t shift;
363 XNN_ALIGN(16) int16_t output_zero_point[8];
364 XNN_ALIGN(16) int8_t output_min[16];
365 XNN_ALIGN(16) int8_t output_max[16];
366 } wasmsimd;
367 #endif // XNN_ARCH_WASMSIMD
368 };
369
370 union xnn_qu8_add_params {
371 struct {
372 int32_t zero_point_product;
373 uint32_t a_multiplier;
374 uint32_t b_multiplier;
375 uint32_t shift;
376 int32_t remainder_mask;
377 int32_t remainder_threshold;
378 int32_t y_zero_point;
379 int32_t y_min;
380 int32_t y_max;
381 } scalar;
382 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
383 struct {
384 uint8_t a_zero_point;
385 uint8_t b_zero_point;
386 int16_t y_zero_point;
387 int32_t a_multiplier;
388 int32_t b_multiplier;
389 int32_t right_shift;
390 uint8_t y_min;
391 uint8_t y_max;
392 } neon;
393 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
394 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
395 struct {
396 XNN_ALIGN(16) int32_t zero_point_product[4];
397 XNN_ALIGN(16) uint16_t a_multiplier_lo[8];
398 XNN_ALIGN(16) uint16_t a_multiplier_hi[8];
399 XNN_ALIGN(16) uint16_t b_multiplier_lo[8];
400 XNN_ALIGN(16) uint16_t b_multiplier_hi[8];
401 XNN_ALIGN(16) int32_t remainder_mask[4];
402 XNN_ALIGN(16) int32_t remainder_threshold[4];
403 XNN_ALIGN(16) int16_t y_zero_point[8];
404 XNN_ALIGN(16) uint8_t y_min[16];
405 XNN_ALIGN(16) uint8_t y_max[16];
406 uint32_t shift;
407 uint32_t a_multiplier;
408 uint32_t b_multiplier;
409 } sse2;
410 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
411 };
412
413 union xnn_qs8_add_params {
414 struct {
415 int32_t zero_point_product;
416 int32_t x_multiplier;
417 int32_t y_multiplier;
418 uint32_t shift;
419 int32_t remainder_mask;
420 int32_t remainder_threshold;
421 int32_t output_zero_point;
422 int32_t output_min;
423 int32_t output_max;
424 } scalar;
425 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
426 struct {
427 int8_t x_zero_point;
428 int8_t y_zero_point;
429 int16_t output_zero_point;
430 int32_t x_multiplier;
431 int32_t y_multiplier;
432 int32_t right_shift;
433 int8_t output_min;
434 int8_t output_max;
435 } neon;
436 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
437 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
438 struct {
439 XNN_ALIGN(16) int32_t zero_point_product[4];
440 XNN_ALIGN(16) uint16_t x_multiplier_lo[8];
441 XNN_ALIGN(16) uint16_t x_multiplier_hi[8];
442 XNN_ALIGN(16) uint16_t y_multiplier_lo[8];
443 XNN_ALIGN(16) uint16_t y_multiplier_hi[8];
444 XNN_ALIGN(16) int32_t x_multiplier[4];
445 XNN_ALIGN(16) int32_t y_multiplier[4];
446 XNN_ALIGN(16) int32_t remainder_mask[4];
447 XNN_ALIGN(16) int32_t remainder_threshold[4];
448 uint32_t shift;
449 XNN_ALIGN(16) int16_t output_zero_point[8];
450 XNN_ALIGN(16) int16_t output_min[8];
451 XNN_ALIGN(16) int16_t output_max[8];
452 } sse2;
453 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
454 #if XNN_ARCH_WASMSIMD
455 struct {
456 XNN_ALIGN(16) int32_t zero_point_product[4];
457 XNN_ALIGN(16) int32_t x_multiplier[4];
458 XNN_ALIGN(16) int32_t y_multiplier[4];
459 XNN_ALIGN(16) int32_t remainder_mask[4];
460 XNN_ALIGN(16) int32_t remainder_threshold[4];
461 int32_t shift;
462 XNN_ALIGN(16) int16_t output_zero_point[8];
463 XNN_ALIGN(16) int8_t output_min[16];
464 XNN_ALIGN(16) int8_t output_max[16];
465 } wasmsimd;
466 #endif // XNN_ARCH_WASMSIMD
467 };
468
469 union xnn_qu8_avgpool_params {
470 struct {
471 int32_t bias;
472 int32_t multiplier;
473 int64_t rounding;
474 uint32_t right_shift;
475 int32_t output_min_less_zero_point;
476 int32_t output_max_less_zero_point;
477 int32_t output_zero_point;
478 } scalar;
479 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
480 struct {
481 int32_t bias;
482 int32_t multiplier;
483 int64_t left_shift;
484 int16_t output_zero_point;
485 uint8_t output_min;
486 uint8_t output_max;
487 } neon;
488 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
489 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
490 struct {
491 XNN_ALIGN(16) int32_t bias[4];
492 XNN_ALIGN(16) uint32_t multiplier[4];
493 XNN_ALIGN(16) uint64_t rounding[2];
494 XNN_ALIGN(16) uint64_t right_shift[2];
495 XNN_ALIGN(16) int16_t output_zero_point[8];
496 XNN_ALIGN(16) uint8_t output_min[16];
497 XNN_ALIGN(16) uint8_t output_max[16];
498 } sse2;
499 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
500 };
501
502 union xnn_qs8_avgpool_params {
503 struct {
504 int32_t bias;
505 int32_t multiplier;
506 int64_t rounding;
507 uint32_t shift;
508 int32_t output_min_less_zero_point;
509 int32_t output_max_less_zero_point;
510 int32_t output_zero_point;
511 } scalar;
512 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
513 struct {
514 int32_t bias;
515 int32_t multiplier;
516 int64_t left_shift;
517 int16_t output_zero_point;
518 int8_t output_min;
519 int8_t output_max;
520 } neon;
521 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
522 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
523 struct {
524 XNN_ALIGN(16) int32_t bias[4];
525 XNN_ALIGN(16) uint32_t multiplier[4];
526 XNN_ALIGN(16) uint64_t rounding[2];
527 XNN_ALIGN(16) uint64_t shift[2];
528 XNN_ALIGN(16) int16_t output_zero_point[8];
529 XNN_ALIGN(16) int16_t output_min[8];
530 XNN_ALIGN(16) int16_t output_max[8];
531 } sse2;
532 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
533 #if XNN_ARCH_WASMSIMD
534 struct {
535 XNN_ALIGN(16) int32_t bias[4];
536 XNN_ALIGN(16) int64_t multiplier[2];
537 XNN_ALIGN(16) int64_t rounding[2];
538 int32_t shift;
539 XNN_ALIGN(16) int16_t output_zero_point[8];
540 XNN_ALIGN(16) int8_t output_min[16];
541 XNN_ALIGN(16) int8_t output_max[16];
542 } wasmsimd;
543 #endif // XNN_ARCH_WASMSIMD
544 };
545
546 union xnn_qu8_requantization_params {
547 struct {
548 int32_t multiplier;
549 int32_t remainder_mask;
550 int32_t remainder_threshold;
551 uint32_t shift;
552 int32_t min_less_zero_point;
553 int32_t max_less_zero_point;
554 int32_t zero_point;
555 } q31;
556 };
557
558 union xnn_qs8_requantization_params {
559 struct {
560 int32_t multiplier;
561 int32_t remainder_mask;
562 int32_t remainder_threshold;
563 uint32_t shift;
564 int32_t min_less_zero_point;
565 int32_t max_less_zero_point;
566 int32_t zero_point;
567 } q31;
568 };
569
570 typedef void (*xnn_ppmm_ukernel_function)(
571 size_t mr,
572 size_t nc,
573 size_t kc,
574 const void* a,
575 const void* w,
576 void* c,
577 size_t cm_stride,
578 size_t cn_stride,
579 const void* params);
580
581 typedef void (*xnn_f32_ppmm_minmax_ukernel_function)(
582 size_t mr,
583 size_t nc,
584 size_t kc,
585 const float* a,
586 const float* w,
587 float* c,
588 size_t cm_stride,
589 size_t cn_stride,
590 const union xnn_f32_minmax_params* params);
591
592 typedef void (*xnn_f16_ppmm_ukernel_function)(
593 size_t mr,
594 size_t nc,
595 size_t kc,
596 const void* a,
597 const void* w,
598 void* c,
599 size_t cm_stride,
600 size_t cn_stride,
601 const struct xnn_f16_scaleminmax_params* params);
602
603 typedef void (*xnn_gemm_ukernel_function)(
604 size_t mr,
605 size_t nr,
606 size_t k,
607 const void* a,
608 size_t a_stride,
609 const void* w,
610 void* c,
611 size_t cm_stride,
612 size_t cn_stride,
613 const void* params);
614
615 typedef void (*xnn_f32_gemm_ukernel_function)(
616 size_t mr,
617 size_t nr,
618 size_t k,
619 const float* a,
620 size_t a_stride,
621 const float* w,
622 float* c,
623 size_t cm_stride,
624 size_t cn_stride,
625 const union xnn_f32_default_params* params);
626
627 typedef void (*xnn_f32_gemm_relu_ukernel_function)(
628 size_t mr,
629 size_t nr,
630 size_t k,
631 const float* a,
632 size_t a_stride,
633 const float* w,
634 float* c,
635 size_t cm_stride,
636 size_t cn_stride,
637 const union xnn_f32_relu_params* params);
638
639 typedef void (*xnn_f32_gemm_minmax_ukernel_function)(
640 size_t mr,
641 size_t nr,
642 size_t k,
643 const float* a,
644 size_t a_stride,
645 const float* w,
646 float* c,
647 size_t cm_stride,
648 size_t cn_stride,
649 const union xnn_f32_minmax_params* params);
650
651 typedef void (*xnn_f32_gemminc_minmax_ukernel_function)(
652 size_t mr,
653 size_t nr,
654 size_t k,
655 const float* a,
656 size_t a_stride,
657 const float* w,
658 float* c,
659 size_t cm_stride,
660 size_t cn_stride,
661 const float* acc,
662 const union xnn_f32_minmax_params* params);
663
664 typedef void (*xnn_f16_gemm_minmax_ukernel_function)(
665 size_t mr,
666 size_t nr,
667 size_t k,
668 const void* a,
669 size_t a_stride,
670 const void* w,
671 void* c,
672 size_t cm_stride,
673 size_t cn_stride,
674 const struct xnn_f16_scaleminmax_params* params);
675
676 typedef void (*xnn_f16_igemm_minmax_ukernel_function)(
677 size_t mr,
678 size_t nr,
679 size_t kc,
680 size_t ks,
681 const void** a,
682 const void* w,
683 void* c,
684 size_t cm_stride,
685 size_t cn_stride,
686 size_t a_offset,
687 const void* zero,
688 const struct xnn_f16_scaleminmax_params* params);
689
690 typedef void (*xnn_qu8_gemm_ukernel_function)(
691 size_t mr,
692 size_t nr,
693 size_t k,
694 const uint8_t* a,
695 size_t a_stride,
696 const void* w,
697 uint8_t* c,
698 size_t cm_stride,
699 size_t cn_stride,
700 const union xnn_qu8_gemm_params* params);
701
702 typedef void (*xnn_qs8_gemm_ukernel_function)(
703 size_t mr,
704 size_t nr,
705 size_t k,
706 const int8_t* a,
707 size_t a_stride,
708 const void* w,
709 int8_t* c,
710 size_t cm_stride,
711 size_t cn_stride,
712 const union xnn_qs8_gemm_params* params);
713
714 typedef void (*xnn_qs8_gemm_xw_ukernel_function)(
715 size_t mr,
716 size_t nr,
717 size_t k,
718 const int8_t* a,
719 size_t a_stride,
720 const void* w,
721 int8_t* c,
722 size_t cm_stride,
723 size_t cn_stride,
724 const union xnn_qs8_gemm_xw_params* params);
725
726 typedef void (*xnn_igemm_ukernel_function)(
727 size_t mr,
728 size_t nr,
729 size_t kc,
730 size_t ks,
731 const void** a,
732 const void* w,
733 void* c,
734 size_t cm_stride,
735 size_t cn_stride,
736 size_t a_offset,
737 const void* zero,
738 const void* params);
739
740 typedef void (*xnn_f32_igemm_ukernel_function)(
741 size_t mr,
742 size_t nr,
743 size_t kc,
744 size_t ks,
745 const float** a,
746 const float* w,
747 float* c,
748 size_t cm_stride,
749 size_t cn_stride,
750 size_t a_offset,
751 const float* zero,
752 const union xnn_f32_default_params* params);
753
754 typedef void (*xnn_f32_igemm_relu_ukernel_function)(
755 size_t mr,
756 size_t nr,
757 size_t kc,
758 size_t ks,
759 const float** a,
760 const float* w,
761 float* c,
762 size_t cm_stride,
763 size_t cn_stride,
764 size_t a_offset,
765 const float* zero,
766 const union xnn_f32_relu_params* params);
767
768 typedef void (*xnn_f32_igemm_minmax_ukernel_function)(
769 size_t mr,
770 size_t nr,
771 size_t kc,
772 size_t ks,
773 const float** a,
774 const float* w,
775 float* c,
776 size_t cm_stride,
777 size_t cn_stride,
778 size_t a_offset,
779 const float* zero,
780 const union xnn_f32_minmax_params* params);
781
782 typedef void (*xnn_qu8_igemm_ukernel_function)(
783 size_t mr,
784 size_t nr,
785 size_t kc,
786 size_t ks,
787 const uint8_t** a,
788 const void* w,
789 uint8_t* c,
790 size_t cm_stride,
791 size_t cn_stride,
792 size_t a_offset,
793 const uint8_t* zero,
794 const union xnn_qu8_gemm_params* params);
795
796 typedef void (*xnn_qs8_igemm_ukernel_function)(
797 size_t mr,
798 size_t nr,
799 size_t kc,
800 size_t ks,
801 const int8_t** a,
802 const void* w,
803 int8_t* c,
804 size_t cm_stride,
805 size_t cn_stride,
806 size_t a_offset,
807 const int8_t* zero,
808 const union xnn_qs8_gemm_params* params);
809
810 typedef void (*xnn_conv_hwc_ukernel_function)(
811 size_t input_height,
812 size_t input_width,
813 size_t output_y_start,
814 size_t output_y_end,
815 const void* input,
816 const void* zero,
817 const void* weights,
818 void* output,
819 size_t input_padding_top,
820 size_t output_channels,
821 size_t output_height_stride,
822 size_t output_width_stride,
823 const void* params);
824
825 typedef void (*xnn_f32_conv_hwc_ukernel_function)(
826 size_t input_height,
827 size_t input_width,
828 size_t output_y_start,
829 size_t output_y_end,
830 const float* input,
831 const float* zero,
832 const float* weights,
833 float* output,
834 size_t input_padding_top,
835 size_t output_channels,
836 size_t output_height_stride,
837 size_t output_width_stride,
838 const union xnn_f32_minmax_params* params);
839
840 typedef void (*xnn_conv_hwc2chw_ukernel_function)(
841 size_t input_height,
842 size_t input_width,
843 size_t output_y_start,
844 size_t output_y_end,
845 const void* input,
846 const void* zero,
847 const void* weights,
848 void* output,
849 size_t input_padding_top,
850 size_t output_channels,
851 size_t output_height_stride,
852 size_t output_channel_stride,
853 const void* params);
854
855 typedef void (*xnn_f32_conv_hwc2chw_ukernel_function)(
856 size_t input_height,
857 size_t input_width,
858 size_t output_y_start,
859 size_t output_y_end,
860 const float* input,
861 const float* zero,
862 const float* weights,
863 float* output,
864 size_t input_padding_top,
865 size_t output_channels,
866 size_t output_height_stride,
867 size_t output_channel_stride,
868 const union xnn_f32_minmax_params* params);
869
870 typedef void (*xnn_spmm_ukernel_function)(
871 size_t batch_size,
872 size_t output_channels,
873 const void* input,
874 const void* weights,
875 const int32_t* widx_dmap,
876 const uint32_t* nidx_nnzmap,
877 void* output,
878 size_t output_stride,
879 const void* params);
880
881 typedef void (*xnn_f16_spmm_minmax_ukernel_function)(
882 size_t batch_size,
883 size_t output_channels,
884 const void* input,
885 const void* weights,
886 const int32_t* widx_dmap,
887 const uint32_t* nidx_nnzmap,
888 void* output,
889 size_t output_stride,
890 const struct xnn_f16_scaleminmax_params* params);
891
892 typedef void (*xnn_f32_spmm_minmax_ukernel_function)(
893 size_t batch_size,
894 size_t output_channels,
895 const float* input,
896 const float* weights,
897 const int32_t* widx_dmap,
898 const uint32_t* nidx_nnzmap,
899 float* output,
900 size_t output_stride,
901 const union xnn_f32_minmax_params* params);
902
903 typedef void (*xnn_packx_ukernel_function)(
904 size_t m,
905 size_t k,
906 const void* x,
907 size_t x_stride,
908 void* y);
909
910 typedef void (*xnn_x32_packx_ukernel_function)(
911 size_t m,
912 size_t k,
913 const uint32_t* x,
914 size_t x_stride,
915 uint32_t* y);
916
917 typedef void (*xnn_fill_ukernel_function)(
918 size_t rows,
919 size_t channels,
920 void* output,
921 size_t output_stride,
922 const void* fill_value);
923
924 typedef void (*xnn_x32_fill_ukernel_function)(
925 size_t rows,
926 size_t channels,
927 uint32_t* output,
928 size_t output_stride,
929 const uint32_t* fill_value);
930
931 typedef void (*xnn_depthtospace2d_chw2hwc_ukernel_function)(
932 size_t output_channels,
933 size_t input_height,
934 size_t input_width,
935 size_t block_size,
936 const void* input,
937 void* output,
938 size_t output_channels_stride);
939
940 typedef void (*xnn_x32_depthtospace2d_chw2hwc_ukernel_function)(
941 size_t output_channels,
942 size_t input_height,
943 size_t input_width,
944 size_t block_size,
945 const uint32_t* input,
946 uint32_t* output,
947 size_t output_channel_stride);
948
949 typedef void (*xnn_pad_ukernel_function)(
950 size_t rows,
951 size_t channels,
952 size_t pre_padding,
953 size_t post_padding,
954 const void* fill_value,
955 const void* input,
956 size_t input_stride,
957 void* output,
958 size_t output_stride);
959
960 typedef void (*xnn_x32_pad_ukernel_function)(
961 size_t rows,
962 size_t channels,
963 size_t pre_padding,
964 size_t post_padding,
965 const uint32_t* fill_value,
966 const uint32_t* input,
967 size_t input_stride,
968 uint32_t* output,
969 size_t output_stride);
970
971 typedef void (*xnn_unpool_ukernel_function)(
972 size_t p,
973 size_t c,
974 uint32_t f,
975 const void* input,
976 const uint32_t* index,
977 void** output);
978
979 typedef void (*xnn_x32_unpool_ukernel_function)(
980 size_t p,
981 size_t c,
982 uint32_t f,
983 const uint32_t* input,
984 const uint32_t* index,
985 uint32_t** output);
986
987 typedef void (*xnn_zipc_ukernel_function)(
988 size_t n,
989 const void* x,
990 void* y);
991
992 typedef void (*xnn_x8_zipc_ukernel_function)(
993 size_t n,
994 const uint8_t* x,
995 uint8_t* y);
996
997 typedef void (*xnn_x32_zipc_ukernel_function)(
998 size_t n,
999 const uint32_t* x,
1000 uint32_t* y);
1001
1002 typedef void (*xnn_zipv_ukernel_function)(
1003 size_t n,
1004 size_t m,
1005 const void* x,
1006 void* y);
1007
1008 typedef void (*xnn_x8_zipv_ukernel_function)(
1009 size_t n,
1010 size_t m,
1011 const uint8_t* x,
1012 uint8_t* y);
1013
1014 typedef void (*xnn_x32_zipv_ukernel_function)(
1015 size_t n,
1016 size_t m,
1017 const uint32_t* x,
1018 uint32_t* y);
1019
1020 typedef void (*xnn_x8_lut_ukernel_function)(
1021 size_t n,
1022 const uint8_t* x,
1023 const uint8_t* t,
1024 uint8_t* y);
1025
1026 typedef void (*xnn_dwconv2d_chw_ukernel_function)(
1027 size_t input_height,
1028 size_t input_width,
1029 const void* input,
1030 const void* weights,
1031 const void* zero,
1032 void* output,
1033 uint32_t padding_top,
1034 const void* params);
1035
1036 typedef void (*xnn_f32_dwconv2d_chw_ukernel_function)(
1037 size_t input_height,
1038 size_t input_width,
1039 const float* input,
1040 const float* weights,
1041 const float* zero,
1042 float* output,
1043 uint32_t padding_top,
1044 const union xnn_f32_chw_params* params);
1045
1046 typedef void (*xnn_dwconv_unipass_ukernel_function)(
1047 size_t channels,
1048 size_t output_width,
1049 const void** input,
1050 const void* weights,
1051 void* output,
1052 size_t input_stride,
1053 size_t output_increment,
1054 size_t input_offset,
1055 const void* zero,
1056 const void* params);
1057
1058 typedef void (*xnn_f32_dwconv_unipass_ukernel_function)(
1059 size_t channels,
1060 size_t output_width,
1061 const float** input,
1062 const float* weights,
1063 float* output,
1064 size_t input_stride,
1065 size_t output_increment,
1066 size_t input_offset,
1067 const float* zero,
1068 const union xnn_f32_default_params* params);
1069
1070 typedef void (*xnn_f32_dwconv_minmax_unipass_ukernel_function)(
1071 size_t channels,
1072 size_t output_width,
1073 const float** input,
1074 const float* weights,
1075 float* output,
1076 size_t input_stride,
1077 size_t output_increment,
1078 size_t input_offset,
1079 const float* zero,
1080 const union xnn_f32_minmax_params* params);
1081
1082 typedef void (*xnn_f16_dwconv_minmax_unipass_ukernel_function)(
1083 size_t channels,
1084 size_t output_width,
1085 const void** input,
1086 const void* weights,
1087 void* output,
1088 size_t input_stride,
1089 size_t output_increment,
1090 size_t input_offset,
1091 const void* zero,
1092 const struct xnn_f16_minmax_params* params);
1093
1094 typedef void (*xnn_qu8_dwconv_minmax_unipass_ukernel_function)(
1095 size_t channels,
1096 size_t output_width,
1097 const uint8_t** input,
1098 const void* weights,
1099 uint8_t* output,
1100 size_t input_stride,
1101 size_t output_increment,
1102 size_t input_offset,
1103 const uint8_t* zero,
1104 const union xnn_qu8_gemm_params* params);
1105
1106 typedef void (*xnn_qs8_dwconv_minmax_unipass_ukernel_function)(
1107 size_t channels,
1108 size_t output_width,
1109 const int8_t** input,
1110 const void* weights,
1111 int8_t* output,
1112 size_t input_stride,
1113 size_t output_increment,
1114 size_t input_offset,
1115 const int8_t* zero,
1116 const union xnn_qs8_gemm_params* params);
1117
1118 typedef void (*xnn_dwconv_multipass_ukernel_function)(
1119 size_t channels,
1120 size_t output_width,
1121 const void** input,
1122 const void* weights,
1123 void* buffer,
1124 void* output,
1125 size_t input_stride,
1126 size_t output_increment,
1127 size_t input_offset,
1128 const void* zero,
1129 const void* params);
1130
1131 typedef void (*xnn_f32_ibilinear_ukernel_function)(
1132 size_t output_pixels,
1133 size_t channels,
1134 const float** input,
1135 size_t input_offset,
1136 const float* weights,
1137 float* output,
1138 size_t output_increment);
1139
1140 typedef void (*xnn_f32_ibilinear_chw_ukernel_function)(
1141 size_t output_pixels,
1142 size_t channels,
1143 const float** input,
1144 size_t input_offset,
1145 const float* weights,
1146 float* output,
1147 size_t input_increment);
1148
1149 typedef void (*xnn_ibilinear_ukernel_function)(
1150 size_t output_pixels,
1151 size_t channels,
1152 const void** input,
1153 size_t input_offset,
1154 const void* weights,
1155 void* output,
1156 size_t output_increment);
1157
1158 typedef void (*xnn_ibilinear_chw_ukernel_function)(
1159 size_t output_pixels,
1160 size_t channels,
1161 const void** input,
1162 size_t input_offset,
1163 const void* weights,
1164 void* output,
1165 size_t input_increment);
1166
1167 typedef void (*xnn_gavgpool_unipass_ukernel_function)(
1168 size_t rows,
1169 size_t channels,
1170 const void* input,
1171 size_t input_stride,
1172 const void* zero,
1173 void* output,
1174 const void* params);
1175
1176 typedef void (*xnn_f16_gavgpool_minmax_unipass_ukernel_function)(
1177 size_t rows,
1178 size_t channels,
1179 const void* input,
1180 size_t input_stride,
1181 const void* zero,
1182 void* output,
1183 const struct xnn_f16_scaleminmax_params* params);
1184
1185 typedef void (*xnn_f32_gavgpool_minmax_unipass_ukernel_function)(
1186 size_t rows,
1187 size_t channels,
1188 const float* input,
1189 size_t input_stride,
1190 const float* zero,
1191 float* output,
1192 const union xnn_f32_scaleminmax_params* params);
1193
1194 typedef void (*xnn_qu8_gavgpool_minmax_unipass_ukernel_function)(
1195 size_t rows,
1196 size_t channels,
1197 const uint8_t* input,
1198 size_t input_stride,
1199 const uint8_t* zero,
1200 uint8_t* output,
1201 const union xnn_qu8_avgpool_params* params);
1202
1203 typedef void (*xnn_qs8_gavgpool_minmax_unipass_ukernel_function)(
1204 size_t rows,
1205 size_t channels,
1206 const int8_t* input,
1207 size_t input_stride,
1208 const int8_t* zero,
1209 int8_t* output,
1210 const union xnn_qs8_avgpool_params* params);
1211
1212 typedef void (*xnn_gavgpool_multipass_ukernel_function)(
1213 size_t rows,
1214 size_t channels,
1215 const void* input,
1216 size_t input_stride,
1217 const void* zero,
1218 void* buffer,
1219 void* output,
1220 const void* params);
1221
1222 typedef void (*xnn_f16_gavgpool_minmax_multipass_ukernel_function)(
1223 size_t rows,
1224 size_t channels,
1225 const void* input,
1226 size_t input_stride,
1227 const void* zero,
1228 void* buffer,
1229 void* output,
1230 const struct xnn_f16_scaleminmax_params* params);
1231
1232 typedef void (*xnn_f32_gavgpool_minmax_multipass_ukernel_function)(
1233 size_t rows,
1234 size_t channels,
1235 const float* input,
1236 size_t input_stride,
1237 const float* zero,
1238 float* buffer,
1239 float* output,
1240 const union xnn_f32_scaleminmax_params* params);
1241
1242 typedef void (*xnn_qu8_gavgpool_minmax_multipass_ukernel_function)(
1243 size_t rows,
1244 size_t channels,
1245 const uint8_t* input,
1246 size_t input_stride,
1247 const uint8_t* zero,
1248 int32_t* buffer,
1249 uint8_t* output,
1250 const union xnn_qu8_avgpool_params* params);
1251
1252 typedef void (*xnn_qs8_gavgpool_minmax_multipass_ukernel_function)(
1253 size_t rows,
1254 size_t channels,
1255 const int8_t* input,
1256 size_t input_stride,
1257 const int8_t* zero,
1258 int32_t* buffer,
1259 int8_t* output,
1260 const union xnn_qs8_avgpool_params* params);
1261
1262 typedef void (*xnn_gavgpool_cw_ukernel_function)(
1263 size_t elements,
1264 size_t channels,
1265 const float* input,
1266 float* output,
1267 const void* params);
1268
1269 typedef void (*xnn_f32_gavgpool_cw_ukernel_function)(
1270 size_t elements,
1271 size_t channels,
1272 const float* input,
1273 float* output,
1274 const union xnn_f32_gavgpool_params* params);
1275
1276 typedef void (*xnn_avgpool_unipass_ukernel_function)(
1277 size_t output_pixels,
1278 size_t kernel_elements,
1279 size_t channels,
1280 const void** input,
1281 size_t input_offset,
1282 const void* zero,
1283 void* output,
1284 size_t input_increment,
1285 size_t output_increment,
1286 const void* params);
1287
1288 typedef void (*xnn_f32_avgpool_minmax_unipass_ukernel_function)(
1289 size_t output_pixels,
1290 size_t kernel_elements,
1291 size_t channels,
1292 const float** input,
1293 size_t input_offset,
1294 const float* zero,
1295 float* output,
1296 size_t input_increment,
1297 size_t output_increment,
1298 const union xnn_f32_scaleminmax_params* params);
1299
1300 typedef void (*xnn_qu8_avgpool_minmax_unipass_ukernel_function)(
1301 size_t output_pixels,
1302 size_t kernel_elements,
1303 size_t channels,
1304 const uint8_t** input,
1305 size_t input_offset,
1306 const uint8_t* zero,
1307 uint8_t* output,
1308 size_t input_increment,
1309 size_t output_increment,
1310 const union xnn_qu8_avgpool_params* params);
1311
1312 typedef void (*xnn_avgpool_multipass_ukernel_function)(
1313 size_t output_pixels,
1314 size_t kernel_elements,
1315 size_t channels,
1316 const void** input,
1317 size_t input_offset,
1318 const void* zero,
1319 void* buffer,
1320 void* output,
1321 size_t input_increment,
1322 size_t output_increment,
1323 const void* params);
1324
1325 typedef void (*xnn_f32_avgpool_minmax_multipass_ukernel_function)(
1326 size_t output_pixels,
1327 size_t kernel_elements,
1328 size_t channels,
1329 const float** input,
1330 size_t input_offset,
1331 const float* zero,
1332 float* buffer,
1333 float* output,
1334 size_t input_increment,
1335 size_t output_increment,
1336 const union xnn_f32_scaleminmax_params* params);
1337
1338 typedef void (*xnn_qu8_avgpool_minmax_multipass_ukernel_function)(
1339 size_t output_pixels,
1340 size_t kernel_elements,
1341 size_t channels,
1342 const uint8_t** input,
1343 size_t input_offset,
1344 const uint8_t* zero,
1345 int32_t* buffer,
1346 uint8_t* output,
1347 size_t input_increment,
1348 size_t output_increment,
1349 const union xnn_qu8_avgpool_params* params);
1350
1351 typedef void (*xnn_pavgpool_unipass_ukernel_function)(
1352 size_t output_pixels,
1353 size_t kernel_elements,
1354 size_t channels,
1355 const void** input,
1356 size_t input_offset,
1357 const void* zero,
1358 const void* multiplier,
1359 void* output,
1360 size_t input_increment,
1361 size_t output_increment,
1362 const void* params);
1363
1364 typedef void (*xnn_f32_pavgpool_minmax_unipass_ukernel_function)(
1365 size_t output_pixels,
1366 size_t kernel_elements,
1367 size_t channels,
1368 const float** input,
1369 size_t input_offset,
1370 const float* zero,
1371 const float* multiplier,
1372 float* output,
1373 size_t input_increment,
1374 size_t output_increment,
1375 const union xnn_f32_minmax_params* params);
1376
1377 typedef void (*xnn_pavgpool_multipass_ukernel_function)(
1378 size_t output_pixels,
1379 size_t kernel_elements,
1380 size_t channels,
1381 const void** input,
1382 size_t input_offset,
1383 const void* zero,
1384 const void* multiplier,
1385 void* buffer,
1386 void* output,
1387 size_t input_increment,
1388 size_t output_increment,
1389 const void* params);
1390
1391 typedef void (*xnn_f32_pavgpool_minmax_multipass_ukernel_function)(
1392 size_t output_pixels,
1393 size_t kernel_elements,
1394 size_t channels,
1395 const float** input,
1396 size_t input_offset,
1397 const float* zero,
1398 const float* multiplier,
1399 float* buffer,
1400 float* output,
1401 size_t input_increment,
1402 size_t output_increment,
1403 const union xnn_f32_minmax_params* params);
1404
1405 typedef void (*xnn_maxpool_ukernel_function)(
1406 size_t output_pixels,
1407 size_t kernel_elements,
1408 size_t channels,
1409 const void** input,
1410 size_t input_offset,
1411 void* output,
1412 size_t input_increment,
1413 size_t output_increment,
1414 const void* params);
1415
1416 typedef void (*xnn_f32_maxpool_ukernel_function)(
1417 size_t output_pixels,
1418 size_t kernel_elements,
1419 size_t channels,
1420 const float** input,
1421 size_t input_offset,
1422 float* output,
1423 size_t input_increment,
1424 size_t output_increment,
1425 const union xnn_f32_minmax_params* params);
1426
1427 typedef void (*xnn_u8_maxpool_ukernel_function)(
1428 size_t output_pixels,
1429 size_t kernel_elements,
1430 size_t channels,
1431 const uint8_t** input,
1432 size_t input_offset,
1433 uint8_t* output,
1434 size_t input_increment,
1435 size_t output_increment,
1436 const union xnn_u8_minmax_params* params);
1437
1438 typedef void (*xnn_argmaxpool_unipass_ukernel_function)(
1439 size_t output_pixels,
1440 size_t kernel_elements,
1441 size_t channels,
1442 const void** input,
1443 size_t input_offset,
1444 void* output,
1445 uint32_t* index,
1446 size_t input_increment,
1447 size_t output_increment);
1448
1449 typedef void (*xnn_f32_argmaxpool_unipass_ukernel_function)(
1450 size_t output_pixels,
1451 size_t kernel_elements,
1452 size_t channels,
1453 const float** input,
1454 size_t input_offset,
1455 float* output,
1456 uint32_t* index,
1457 size_t input_increment,
1458 size_t output_increment);
1459
1460 typedef void (*xnn_argmaxpool_multipass_ukernel_function)(
1461 size_t output_pixels,
1462 size_t kernel_elements,
1463 size_t channels,
1464 const void** input,
1465 size_t input_offset,
1466 void* accumulation_buffer,
1467 uint32_t* index_buffer,
1468 void* output,
1469 uint32_t* index,
1470 size_t input_increment,
1471 size_t output_increment);
1472
1473 typedef void (*xnn_f32_argmaxpool_multipass_ukernel_function)(
1474 size_t output_pixels,
1475 size_t kernel_elements,
1476 size_t channels,
1477 const float** input,
1478 size_t input_offset,
1479 float* accumulation_buffer,
1480 uint32_t* index_buffer,
1481 float* output,
1482 uint32_t* index,
1483 size_t input_increment,
1484 size_t output_increment);
1485
1486 typedef void (*xnn_univector_ukernel_function)(
1487 size_t n,
1488 const void* x,
1489 void* y,
1490 const void* params);
1491
1492 typedef void (*xnn_f16_clamp_ukernel_function)(
1493 size_t n,
1494 const void* x,
1495 void* y,
1496 const struct xnn_f16_minmax_params* params);
1497
1498 typedef void (*xnn_f32_clamp_ukernel_function)(
1499 size_t n,
1500 const float* x,
1501 float* y,
1502 const union xnn_f32_minmax_params* params);
1503
1504 typedef void (*xnn_u8_clamp_ukernel_function)(
1505 size_t n,
1506 const uint8_t* x,
1507 uint8_t* y,
1508 const union xnn_u8_minmax_params* params);
1509
1510 typedef void (*xnn_f16_relu_ukernel_function)(
1511 size_t n,
1512 const void* x,
1513 void* y,
1514 const struct xnn_f16_relu_params* params);
1515
1516 typedef void (*xnn_f32_relu_ukernel_function)(
1517 size_t n,
1518 const float* x,
1519 float* y,
1520 const union xnn_f32_relu_params* params);
1521
1522 typedef void (*xnn_f16_hswish_ukernel_function)(
1523 size_t n,
1524 const void* x,
1525 void* y,
1526 const struct xnn_f16_hswish_params* params);
1527
1528 typedef void (*xnn_f32_hswish_ukernel_function)(
1529 size_t n,
1530 const float* x,
1531 float* y,
1532 const union xnn_f32_hswish_params* params);
1533
1534 typedef void (*xnn_rmax_ukernel_function)(
1535 size_t n,
1536 const void* x,
1537 void* y);
1538
1539 typedef void (*xnn_u8_rmax_ukernel_function)(
1540 size_t n,
1541 const uint8_t* x,
1542 uint8_t* y);
1543
1544 typedef void (*xnn_f32_rmax_ukernel_function)(
1545 size_t n,
1546 const float* x,
1547 float* y);
1548
1549 typedef void (*xnn_u8_lut32norm_ukernel_function)(
1550 size_t n,
1551 const uint8_t* x,
1552 const uint32_t* t,
1553 uint8_t* y);
1554
1555 typedef void (*xnn_vadd_ukernel_function)(
1556 size_t n,
1557 const void* a,
1558 const void* b,
1559 void* y,
1560 const void* params);
1561
1562 typedef void (*xnn_qu8_vadd_minmax_ukernel_function)(
1563 size_t n,
1564 const uint8_t* input_x,
1565 const uint8_t* input_y,
1566 uint8_t* output,
1567 const union xnn_qu8_add_params* params);
1568
1569 typedef void (*xnn_qs8_vadd_minmax_ukernel_function)(
1570 size_t n,
1571 const int8_t* input_x,
1572 const int8_t* input_y,
1573 int8_t* output,
1574 const union xnn_qs8_add_params* params);
1575
1576 typedef void (*xnn_f32_velu_ukernel_function)(
1577 size_t n,
1578 const float* x,
1579 float* y,
1580 const union xnn_f32_elu_params* params);
1581
1582 typedef void (*xnn_f32_vsqrt_ukernel_function)(
1583 size_t n,
1584 const float* x,
1585 float* y,
1586 const union xnn_f32_sqrt_params* params);
1587
1588 typedef void (*xnn_vbinary_ukernel_function)(
1589 size_t n,
1590 const void* a,
1591 const void* b,
1592 void* y,
1593 const void* params);
1594
1595 typedef void (*xnn_f16_vbinary_ukernel_function)(
1596 size_t n,
1597 const void* a,
1598 const void* b,
1599 void* y,
1600 const struct xnn_f16_default_params* params);
1601
1602 typedef void (*xnn_f16_vbinary_minmax_ukernel_function)(
1603 size_t n,
1604 const void* a,
1605 const void* b,
1606 void* y,
1607 const struct xnn_f16_minmax_params* params);
1608
1609 typedef void (*xnn_f32_vbinary_ukernel_function)(
1610 size_t n,
1611 const float* a,
1612 const float* b,
1613 float* y,
1614 const union xnn_f32_default_params* params);
1615
1616 typedef void (*xnn_f32_vbinary_minmax_ukernel_function)(
1617 size_t n,
1618 const float* a,
1619 const float* b,
1620 float* y,
1621 const union xnn_f32_minmax_params* params);
1622
1623 typedef void (*xnn_f32_vbinary_relu_ukernel_function)(
1624 size_t n,
1625 const float* a,
1626 const float* b,
1627 float* y,
1628 const union xnn_f32_relu_params* params);
1629
1630 typedef void (*xnn_vunary_ukernel_function)(
1631 size_t n,
1632 const void* x,
1633 void* y,
1634 const void* params);
1635
1636 typedef void (*xnn_f32_vunary_ukernel_function)(
1637 size_t n,
1638 const float* x,
1639 float* y,
1640 const void* params);
1641
1642 typedef void (*xnn_vmulcaddc_ukernel_function)(
1643 size_t m,
1644 size_t c,
1645 const void* x,
1646 size_t x_stride,
1647 const void* w,
1648 void* y,
1649 size_t y_stride,
1650 const void* params);
1651
1652 typedef void (*xnn_f16_vmulcaddc_ukernel_function)(
1653 size_t m,
1654 size_t c,
1655 const void* x,
1656 size_t x_stride,
1657 const void* w,
1658 void* y,
1659 size_t y_stride,
1660 const struct xnn_f16_minmax_params* params);
1661
1662 typedef void (*xnn_f32_vmulcaddc_ukernel_function)(
1663 size_t m,
1664 size_t c,
1665 const float* x,
1666 size_t x_stride,
1667 const float* w,
1668 float* y,
1669 size_t y_stride,
1670 const union xnn_f32_minmax_params* params);
1671
1672 typedef void (*xnn_prelu_ukernel_function)(
1673 size_t mr,
1674 size_t n,
1675 const void* x,
1676 size_t x_stride,
1677 const void* w,
1678 void* y,
1679 size_t y_stride);
1680
1681 typedef void (*xnn_f16_prelu_ukernel_function)(
1682 size_t mr,
1683 size_t n,
1684 const void* x,
1685 size_t x_stride,
1686 const void* w,
1687 void* y,
1688 size_t y_stride);
1689
1690 typedef void (*xnn_f32_prelu_ukernel_function)(
1691 size_t mr,
1692 size_t n,
1693 const float* x,
1694 size_t x_stride,
1695 const float* w,
1696 float* y,
1697 size_t y_stride);
1698
1699 typedef void (*xnn_f32_raddexpminusmax_ukernel_function)(
1700 size_t n,
1701 const float* input,
1702 float* sum,
1703 float max);
1704
1705 typedef void (*xnn_f32_raddstoreexpminusmax_ukernel_function)(
1706 size_t n,
1707 const float* input,
1708 float* output,
1709 float* sum,
1710 float max);
1711
1712 typedef void (*xnn_f32_vscaleexpminusmax_ukernel_function)(
1713 size_t n,
1714 const float* input,
1715 float* output,
1716 float max,
1717 float scale);
1718
1719 typedef void (*xnn_f32_vscale_ukernel_function)(
1720 size_t n,
1721 const float* x,
1722 float* y,
1723 float c);
1724
1725 // Reduce-Add Extended ("mantissa" + "exponent") Exponentials
1726 typedef void (*xnn_f32_raddextexp_ukernel_function)(
1727 size_t n,
1728 const float* input,
1729 float* sum);
1730
1731 // Vector Scale Extended ("mantissa" + "exponent") Exponentials
1732 typedef void (*xnn_f32_vscaleextexp_ukernel_function)(
1733 size_t n,
1734 const float* input,
1735 float* output,
1736 float scale_mantissa,
1737 float scale_exponent);
1738
1739 struct xnn_hmp_gemm_ukernel {
1740 xnn_gemm_ukernel_function function[XNN_MAX_UARCH_TYPES];
1741 };
1742
xnn_init_hmp_gemm_ukernel(xnn_gemm_ukernel_function function)1743 static inline struct xnn_hmp_gemm_ukernel xnn_init_hmp_gemm_ukernel(xnn_gemm_ukernel_function function) {
1744 struct xnn_hmp_gemm_ukernel ukernel = { function };
1745 for (size_t i = 1; i < XNN_MAX_UARCH_TYPES; i++) {
1746 ukernel.function[i] = function;
1747 }
1748 return ukernel;
1749 }
1750
xnn_is_hmp_gemm_ukernel(struct xnn_hmp_gemm_ukernel ukernel)1751 static inline bool xnn_is_hmp_gemm_ukernel(struct xnn_hmp_gemm_ukernel ukernel) {
1752 #if XNN_MAX_UARCH_TYPES == 1
1753 return false;
1754 #else
1755 uintptr_t default_function = (uintptr_t) ukernel.function[XNN_UARCH_DEFAULT];
1756 uintptr_t difference = 0;
1757 for (size_t i = 1; i < XNN_MAX_UARCH_TYPES; i++) {
1758 difference |= (default_function ^ (uintptr_t) ukernel.function[i]);
1759 }
1760 return difference != 0;
1761 #endif
1762 }
1763
1764 struct xnn_hmp_igemm_ukernel {
1765 xnn_igemm_ukernel_function function[XNN_MAX_UARCH_TYPES];
1766 };
1767
xnn_init_hmp_igemm_ukernel(xnn_igemm_ukernel_function function)1768 static inline struct xnn_hmp_igemm_ukernel xnn_init_hmp_igemm_ukernel(xnn_igemm_ukernel_function function) {
1769 struct xnn_hmp_igemm_ukernel ukernel = { function };
1770 for (size_t i = 1; i < XNN_MAX_UARCH_TYPES; i++) {
1771 ukernel.function[i] = function;
1772 }
1773 return ukernel;
1774 }
1775
xnn_is_hmp_igemm_ukernel(struct xnn_hmp_igemm_ukernel ukernel)1776 static inline bool xnn_is_hmp_igemm_ukernel(struct xnn_hmp_igemm_ukernel ukernel) {
1777 #if XNN_MAX_UARCH_TYPES == 1
1778 return false;
1779 #else
1780 uintptr_t default_function = (uintptr_t) ukernel.function[XNN_UARCH_DEFAULT];
1781 uintptr_t difference = 0;
1782 for (size_t i = 1; i < XNN_MAX_UARCH_TYPES; i++) {
1783 difference |= (default_function ^ (uintptr_t) ukernel.function[i]);
1784 }
1785 return difference != 0;
1786 #endif
1787 }
1788
1789 struct gemm_fused_ukernels {
1790 struct xnn_hmp_gemm_ukernel gemm;
1791 struct xnn_hmp_igemm_ukernel igemm;
1792 // Optional GEMM and IGEMM micro-kernels with MR=1 and the same NR and KR parameters.
1793 struct xnn_hmp_gemm_ukernel gemm1;
1794 struct xnn_hmp_igemm_ukernel igemm1;
1795 };
1796
1797 struct gemm_parameters {
1798 struct gemm_fused_ukernels minmax;
1799 struct gemm_fused_ukernels relu;
1800 struct gemm_fused_ukernels linear;
1801 uint8_t mr;
1802 uint8_t nr;
1803 uint8_t log2_kr;
1804 uint8_t log2_sr;
1805 };
1806
1807 struct vbinary_fused_ukernels {
1808 xnn_vbinary_ukernel_function op_ukernel;
1809 xnn_vbinary_ukernel_function opc_ukernel;
1810 xnn_vbinary_ukernel_function ropc_ukernel;
1811 };
1812
1813 struct vbinary_parameters {
1814 struct vbinary_fused_ukernels minmax;
1815 struct vbinary_fused_ukernels linear;
1816 // Number of elements in a tile.
1817 // For best efficiency, micro-kernel must process a multiple of this number of elements in each call.
1818 uint8_t element_tile;
1819 };
1820
1821 struct spmm_parameters {
1822 xnn_spmm_ukernel_function ukernel;
1823 // Number of M-dimension elements in a tile.
1824 // Corresponds to a block of pixels in 1x1 Convolution and a block of batch size in Fully Connected operator.
1825 uint8_t mr;
1826 // Number of N-dimension elements in a tile.
1827 // Corresponds to a block of output channels/features in 1x1 Convolution and Fully Connected operator.
1828 uint8_t nr;
1829 };
1830
1831 struct conv_hwc2chw_parameters {
1832 xnn_conv_hwc2chw_ukernel_function ukernel_with_symm_padding;
1833 // Number of output channels in a tile.
1834 // This parameter must be passed as is to weight packing function.
1835 uint8_t output_channel_tile;
1836 // Number of output height pixels in a tile.
1837 // For best efficiency, micro-kernel must produce a multiple of this number of rows in each call.
1838 uint8_t output_height_tile;
1839 // Number of output width pixes in a tile.
1840 uint8_t output_width_tile;
1841 };
1842
1843 struct dwconv2d_chw_parameters {
1844 xnn_dwconv2d_chw_ukernel_function ukernel;
1845 // Number of output width pixels in a tile.
1846 uint8_t output_width_tile;
1847 // Number of output height pixels in a tile.
1848 // For best efficiency, micro-kernel must produce a multiple of this number of rows in each call.
1849 uint8_t output_height_tile;
1850 };
1851
1852 struct gavgpool_cw_parameters {
1853 xnn_gavgpool_cw_ukernel_function ukernel;
1854 // Number of channels in a tile.
1855 // For best efficiency, micro-kernel must process a multiple of this number of channels in each call.
1856 uint8_t channel_tile;
1857 };
1858
1859 union dwconv_fused_ukernels {
1860 xnn_dwconv_unipass_ukernel_function unipass;
1861 xnn_dwconv_multipass_ukernel_function multipass;
1862 };
1863
1864 struct dwconv_parameters {
1865 union dwconv_fused_ukernels minmax;
1866 union dwconv_fused_ukernels linear;
1867 uint8_t channel_tile;
1868 uint8_t primary_tile;
1869 uint8_t incremental_tile;
1870 };
1871
1872 struct depthtospace2d_chw2hwc_parameters {
1873 xnn_depthtospace2d_chw2hwc_ukernel_function ukernel;
1874 // Number of output pixels in a tile.
1875 // For best efficiency, micro-kernel must produce a multiple of this number of pixels in each call.
1876 uint8_t pixel_tile;
1877 // Number of channels in a tile.
1878 // For best efficiency, micro-kernel must process a multiple of this number of channels in each call.
1879 uint8_t channel_tile;
1880 };
1881
1882 struct gavgpool_parameters {
1883 xnn_gavgpool_unipass_ukernel_function up;
1884 xnn_gavgpool_multipass_ukernel_function mp;
1885 uint8_t mr;
1886 };
1887
1888 struct avgpool_parameters {
1889 xnn_avgpool_unipass_ukernel_function up;
1890 xnn_avgpool_multipass_ukernel_function mp;
1891 uint8_t mr;
1892 uint8_t qr;
1893 };
1894
1895 struct pavgpool_parameters {
1896 xnn_pavgpool_unipass_ukernel_function up;
1897 xnn_pavgpool_multipass_ukernel_function mp;
1898 uint8_t mr;
1899 uint8_t qr;
1900 };
1901
1902 struct argmaxpool_parameters {
1903 union {
1904 xnn_argmaxpool_unipass_ukernel_function up;
1905 xnn_argmaxpool_multipass_ukernel_function mp;
1906 };
1907 uint8_t mr;
1908 uint8_t qr;
1909 };
1910
1911 struct maxpool_parameters {
1912 xnn_maxpool_ukernel_function ukernel;
1913 uint8_t mr;
1914 uint8_t qr;
1915 };
1916
1917 struct ibilinear_parameters {
1918 xnn_ibilinear_ukernel_function ukernel;
1919 // Number of output pixels in a tile.
1920 // For best efficiency, micro-kernel must produce a multiple of this number of pixels in each call.
1921 uint8_t pixel_tile;
1922 // Number of channels in a tile.
1923 // For best efficiency, micro-kernel must process a multiple of this number of channels in each call.
1924 uint8_t channel_tile;
1925 };
1926
1927 struct ibilinear_chw_parameters {
1928 xnn_ibilinear_chw_ukernel_function ukernel;
1929 // Number of output pixels in a tile.
1930 // For best efficiency, micro-kernel must produce a multiple of this number of pixels in each call.
1931 uint8_t pixel_tile;
1932 // Number of channels in a tile.
1933 // For best efficiency, micro-kernel must process a multiple of this number of channels in each call.
1934 uint8_t channel_tile;
1935 };
1936
1937 struct zip_parameters {
1938 xnn_zipc_ukernel_function x2;
1939 xnn_zipc_ukernel_function x3;
1940 xnn_zipc_ukernel_function x4;
1941 xnn_zipv_ukernel_function xm;
1942 };
1943
1944 struct prelu_parameters {
1945 xnn_prelu_ukernel_function ukernel;
1946 uint16_t row_tile;
1947 uint16_t channel_tile;
1948 };
1949
1950 struct fill_parameters {
1951 xnn_fill_ukernel_function ukernel;
1952 // Number of rows of inputs processed in one tile.
1953 // For best efficiency, micro-kernel must produce a multiple of this number of rows in each call.
1954 uint8_t row_tile;
1955 };
1956
1957 struct pad_parameters {
1958 xnn_pad_ukernel_function ukernel;
1959 // Number of rows of inputs processed in one tile.
1960 // For best efficiency, micro-kernel must produce a multiple of this number of rows in each call.
1961 uint8_t row_tile;
1962 };
1963
1964 struct vmulcaddc_parameters {
1965 xnn_vmulcaddc_ukernel_function ukernel;
1966 uint8_t channel_tile;
1967 uint8_t row_tile;
1968 };
1969
1970 #define XNN_MAX_QS8_DWCONV_UKERNELS 1
1971 #define XNN_MAX_QU8_DWCONV_UKERNELS 1
1972 #define XNN_MAX_F16_DWCONV_UKERNELS 3
1973 #define XNN_MAX_F32_DWCONV_UKERNELS 3
1974 #define XNN_MAX_F32_ARGMAXPOOL_UKERNELS 3
1975
1976 // Indicates that XNNPACK as a whole has initialized.
1977 // This does not guarantee that any particular microkernels are available.
1978 #define XNN_INIT_FLAG_XNNPACK 0x00000001
1979 // Indicates that F32 XNNPACK microkernels are available for use.
1980 #define XNN_INIT_FLAG_F32 0x00000002
1981 // Indicates that X32 XNNPACK microkernels are available for use.
1982 #define XNN_INIT_FLAG_X32 0x00000004
1983 // Indicates that F16 XNNPACK microkernels are available for use.
1984 #define XNN_INIT_FLAG_F16 0x00000008
1985 // Indicates that X16 XNNPACK microkernels are available for use.
1986 #define XNN_INIT_FLAG_X16 0x00000010
1987 // Indicates that QS8 XNNPACK microkernels are available for use.
1988 #define XNN_INIT_FLAG_QS8 0x00000020
1989 // Indicates that QU8 XNNPACK microkernels are available for use.
1990 #define XNN_INIT_FLAG_QU8 0x00000040
1991 // Indicates that U8 XNNPACK microkernels are available for use.
1992 #define XNN_INIT_FLAG_U8 0x00000080
1993 // Indicates that X8 XNNPACK microkernels are available for use.
1994 #define XNN_INIT_FLAG_X8 0x00000100
1995 // Indicates that XX XNNPACK microkernels are available for use.
1996 #define XNN_INIT_FLAG_XX 0x00000200
1997 // Indicates that CHW XNNPACK microkernels are optimized for the host platform.
1998 #define XNN_INIT_FLAG_CHW_OPT 0x00000400
1999
2000 struct xnn_parameters {
2001 // Bitwise combination of XNN_INIT_FLAG_* flags
2002 uint32_t init_flags;
2003 struct xnn_allocator allocator;
2004 struct {
2005 xnn_univector_ukernel_function copy;
2006 } xx;
2007 struct {
2008 struct gemm_parameters gemm;
2009 struct dwconv_parameters dwconv[XNN_MAX_QS8_DWCONV_UKERNELS];
2010 struct gavgpool_parameters gavgpool;
2011 struct vbinary_parameters vadd;
2012 } qs8;
2013 struct {
2014 struct gemm_parameters gemm;
2015 struct dwconv_parameters dwconv[XNN_MAX_QU8_DWCONV_UKERNELS];
2016 struct avgpool_parameters avgpool;
2017 struct gavgpool_parameters gavgpool;
2018 xnn_vadd_ukernel_function vadd;
2019 } qu8;
2020 struct {
2021 struct maxpool_parameters maxpool;
2022 xnn_univector_ukernel_function clamp;
2023 xnn_u8_lut32norm_ukernel_function lut32norm;
2024 xnn_u8_rmax_ukernel_function rmax;
2025 } u8;
2026 struct {
2027 xnn_x8_lut_ukernel_function lut;
2028 struct zip_parameters zip;
2029 } x8;
2030 struct {
2031 struct gavgpool_parameters gavgpool;
2032 struct gemm_parameters gemm;
2033 struct gemm_parameters gemm2;
2034 struct dwconv_parameters dwconv[XNN_MAX_F16_DWCONV_UKERNELS];
2035 xnn_univector_ukernel_function hswish;
2036 struct vbinary_parameters vadd;
2037 struct vbinary_parameters vmul;
2038 struct vmulcaddc_parameters vmulcaddc;
2039 } f16;
2040 struct {
2041 struct gemm_parameters gemm;
2042 struct gemm_parameters gemm2;
2043 struct dwconv_parameters dwconv[XNN_MAX_F32_DWCONV_UKERNELS];
2044 struct avgpool_parameters avgpool;
2045 struct pavgpool_parameters pavgpool;
2046 struct gavgpool_parameters gavgpool;
2047 struct maxpool_parameters maxpool;
2048 struct argmaxpool_parameters argmaxpool[XNN_MAX_F32_ARGMAXPOOL_UKERNELS];
2049 // Bilinear interpolation (2D).
2050 struct ibilinear_parameters ibilinear;
2051 xnn_univector_ukernel_function abs;
2052 xnn_univector_ukernel_function clamp;
2053 xnn_univector_ukernel_function elu;
2054 xnn_univector_ukernel_function hswish;
2055 xnn_univector_ukernel_function lrelu;
2056 xnn_univector_ukernel_function neg;
2057 xnn_univector_ukernel_function relu;
2058 xnn_univector_ukernel_function rndne;
2059 xnn_univector_ukernel_function rndz;
2060 xnn_univector_ukernel_function rndu;
2061 xnn_univector_ukernel_function rndd;
2062 xnn_univector_ukernel_function sigmoid;
2063 xnn_univector_ukernel_function sqr;
2064 xnn_univector_ukernel_function sqrt;
2065 struct prelu_parameters prelu;
2066 struct vbinary_parameters vadd;
2067 struct vbinary_parameters vdiv;
2068 struct vbinary_parameters vmax;
2069 struct vbinary_parameters vmin;
2070 struct vbinary_parameters vmul;
2071 struct vbinary_parameters vsub;
2072 struct vbinary_parameters vsqrdiff;
2073 struct vmulcaddc_parameters vmulcaddc;
2074 xnn_f32_raddstoreexpminusmax_ukernel_function raddstoreexpminusmax;
2075 xnn_f32_rmax_ukernel_function rmax;
2076 // Sparse Matrix-Dense Matrix Multiplication (NR=1 block).
2077 struct spmm_parameters spmm;
2078 // Sparse Matrix-Dense Matrix Multiplication (NR=2 block).
2079 struct spmm_parameters spmm2;
2080 // Sparse Matrix-Dense Matrix Multiplication (NR=4 block).
2081 struct spmm_parameters spmm4;
2082 // Direct 3x3 stride-2 Convolution with 3 input channels and HWC->CHW layout conversion.
2083 struct conv_hwc2chw_parameters conv_hwc2chw_3x3c3s2;
2084 // Direct 3x3 stride-1 Convolution with padding 1 on left and right in CHW layout.
2085 struct dwconv2d_chw_parameters dwconv2d_chw_3x3;
2086 // Direct 3x3 stride-2 Convolution with padding 1 on left and right in CHW layout.
2087 struct dwconv2d_chw_parameters dwconv2d_chw_3x3s2;
2088 // Direct 5x5 stride-1 Convolution with padding 2 on left and right in CHW layout.
2089 struct dwconv2d_chw_parameters dwconv2d_chw_5x5;
2090 // Direct 5x5 stride-2 Convolution with padding 2 on left and right in CHW layout.
2091 struct dwconv2d_chw_parameters dwconv2d_chw_5x5s2;
2092 // Global Average Pooling in CW layout.
2093 struct gavgpool_cw_parameters gavgpool_cw;
2094 // Bilinear interpolation (2D) in CHW layout.
2095 struct ibilinear_chw_parameters ibilinear_chw;
2096 } f32;
2097 struct {
2098 struct pad_parameters pad;
2099 struct fill_parameters fill;
2100 xnn_unpool_ukernel_function unpool;
2101 struct zip_parameters zip;
2102 // Depth To Space 2D with CHW->HWC layout conversion.
2103 struct depthtospace2d_chw2hwc_parameters depthtospace2d_chw2hwc;
2104 } x32;
2105 };
2106
2107 #ifdef __cplusplus
2108 extern "C" XNN_INTERNAL struct xnn_parameters xnn_params;
2109 #else
2110 extern XNN_INTERNAL struct xnn_parameters xnn_params;
2111 #endif
2112